Initial commit

2025-06-28 06:25:24 +02:00 · 2020-06-12 01:42:08 -07:00 · 2020-06-12 01:42:08 -07:00 · 3de923156c
commit 3de923156c
parent 2ba6dbe7cc
32 changed files with 4054 additions and 1 deletions
--- a/models/Old/densenet.py
+++ b/models/Old/densenet.py
@ -0,0 +1,107 @@
+'''DenseNet in PyTorch.'''
+import math
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class Bottleneck(nn.Module):
+    def __init__(self, in_planes, growth_rate):
+        super(Bottleneck, self).__init__()
+        self.bn1 = nn.BatchNorm2d(in_planes)
+        self.conv1 = nn.Conv2d(in_planes, 4*growth_rate, kernel_size=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(4*growth_rate)
+        self.conv2 = nn.Conv2d(4*growth_rate, growth_rate, kernel_size=3, padding=1, bias=False)
+
+    def forward(self, x):
+        out = self.conv1(F.relu(self.bn1(x)))
+        out = self.conv2(F.relu(self.bn2(out)))
+        out = torch.cat([out,x], 1)
+        return out
+
+
+class Transition(nn.Module):
+    def __init__(self, in_planes, out_planes):
+        super(Transition, self).__init__()
+        self.bn = nn.BatchNorm2d(in_planes)
+        self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=False)
+
+    def forward(self, x):
+        out = self.conv(F.relu(self.bn(x)))
+        out = F.avg_pool2d(out, 2)
+        return out
+
+
+class DenseNet(nn.Module):
+    def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_classes=10):
+        super(DenseNet, self).__init__()
+        self.growth_rate = growth_rate
+
+        num_planes = 2*growth_rate
+        self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, padding=1, bias=False)
+
+        self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0])
+        num_planes += nblocks[0]*growth_rate
+        out_planes = int(math.floor(num_planes*reduction))
+        self.trans1 = Transition(num_planes, out_planes)
+        num_planes = out_planes
+
+        self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1])
+        num_planes += nblocks[1]*growth_rate
+        out_planes = int(math.floor(num_planes*reduction))
+        self.trans2 = Transition(num_planes, out_planes)
+        num_planes = out_planes
+
+        self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2])
+        num_planes += nblocks[2]*growth_rate
+        out_planes = int(math.floor(num_planes*reduction))
+        self.trans3 = Transition(num_planes, out_planes)
+        num_planes = out_planes
+
+        self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3])
+        num_planes += nblocks[3]*growth_rate
+
+        self.bn = nn.BatchNorm2d(num_planes)
+        self.linear = nn.Linear(num_planes, num_classes)
+
+    def _make_dense_layers(self, block, in_planes, nblock):
+        layers = []
+        for i in range(nblock):
+            layers.append(block(in_planes, self.growth_rate))
+            in_planes += self.growth_rate
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        out = self.conv1(x)
+        out = self.trans1(self.dense1(out))
+        out = self.trans2(self.dense2(out))
+        out = self.trans3(self.dense3(out))
+        out = self.dense4(out)
+        out = F.avg_pool2d(F.relu(self.bn(out)), 4)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+
+def DenseNet121():
+    return DenseNet(Bottleneck, [6,12,24,16], growth_rate=32)
+
+def DenseNet169():
+    return DenseNet(Bottleneck, [6,12,32,32], growth_rate=32)
+
+def DenseNet201():
+    return DenseNet(Bottleneck, [6,12,48,32], growth_rate=32)
+
+def DenseNet161():
+    return DenseNet(Bottleneck, [6,12,36,24], growth_rate=48)
+
+def densenet_cifar():
+    return DenseNet(Bottleneck, [6,12,24,16], growth_rate=12)
+
+def test():
+    net = densenet_cifar()
+    x = torch.randn(1,3,32,32)
+    y = net(x)
+    print(y)
+
+# test()
--- a/models/Old/dpn.py
+++ b/models/Old/dpn.py
@ -0,0 +1,98 @@
+'''Dual Path Networks in PyTorch.'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class Bottleneck(nn.Module):
+    def __init__(self, last_planes, in_planes, out_planes, dense_depth, stride, first_layer):
+        super(Bottleneck, self).__init__()
+        self.out_planes = out_planes
+        self.dense_depth = dense_depth
+
+        self.conv1 = nn.Conv2d(last_planes, in_planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(in_planes)
+        self.conv2 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=32, bias=False)
+        self.bn2 = nn.BatchNorm2d(in_planes)
+        self.conv3 = nn.Conv2d(in_planes, out_planes+dense_depth, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(out_planes+dense_depth)
+
+        self.shortcut = nn.Sequential()
+        if first_layer:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(last_planes, out_planes+dense_depth, kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(out_planes+dense_depth)
+            )
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = F.relu(self.bn2(self.conv2(out)))
+        out = self.bn3(self.conv3(out))
+        x = self.shortcut(x)
+        d = self.out_planes
+        out = torch.cat([x[:,:d,:,:]+out[:,:d,:,:], x[:,d:,:,:], out[:,d:,:,:]], 1)
+        out = F.relu(out)
+        return out
+
+
+class DPN(nn.Module):
+    def __init__(self, cfg):
+        super(DPN, self).__init__()
+        in_planes, out_planes = cfg['in_planes'], cfg['out_planes']
+        num_blocks, dense_depth = cfg['num_blocks'], cfg['dense_depth']
+
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.last_planes = 64
+        self.layer1 = self._make_layer(in_planes[0], out_planes[0], num_blocks[0], dense_depth[0], stride=1)
+        self.layer2 = self._make_layer(in_planes[1], out_planes[1], num_blocks[1], dense_depth[1], stride=2)
+        self.layer3 = self._make_layer(in_planes[2], out_planes[2], num_blocks[2], dense_depth[2], stride=2)
+        self.layer4 = self._make_layer(in_planes[3], out_planes[3], num_blocks[3], dense_depth[3], stride=2)
+        self.linear = nn.Linear(out_planes[3]+(num_blocks[3]+1)*dense_depth[3], 10)
+
+    def _make_layer(self, in_planes, out_planes, num_blocks, dense_depth, stride):
+        strides = [stride] + [1]*(num_blocks-1)
+        layers = []
+        for i,stride in enumerate(strides):
+            layers.append(Bottleneck(self.last_planes, in_planes, out_planes, dense_depth, stride, i==0))
+            self.last_planes = out_planes + (i+2) * dense_depth
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = self.layer4(out)
+        out = F.avg_pool2d(out, 4)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+
+
+def DPN26():
+    cfg = {
+        'in_planes': (96,192,384,768),
+        'out_planes': (256,512,1024,2048),
+        'num_blocks': (2,2,2,2),
+        'dense_depth': (16,32,24,128)
+    }
+    return DPN(cfg)
+
+def DPN92():
+    cfg = {
+        'in_planes': (96,192,384,768),
+        'out_planes': (256,512,1024,2048),
+        'num_blocks': (3,4,20,3),
+        'dense_depth': (16,32,24,128)
+    }
+    return DPN(cfg)
+
+
+def test():
+    net = DPN92()
+    x = torch.randn(1,3,32,32)
+    y = net(x)
+    print(y)
+
+# test()
--- a/models/Old/efficientnet.py
+++ b/models/Old/efficientnet.py
@ -0,0 +1,175 @@
+'''EfficientNet in PyTorch.
+
+Paper: "EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks".
+
+Reference: https://github.com/keras-team/keras-applications/blob/master/keras_applications/efficientnet.py
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+def swish(x):
+    return x * x.sigmoid()
+
+
+def drop_connect(x, drop_ratio):
+    keep_ratio = 1.0 - drop_ratio
+    mask = torch.empty([x.shape[0], 1, 1, 1], dtype=x.dtype, device=x.device)
+    mask.bernoulli_(keep_ratio)
+    x.div_(keep_ratio)
+    x.mul_(mask)
+    return x
+
+
+class SE(nn.Module):
+    '''Squeeze-and-Excitation block with Swish.'''
+
+    def __init__(self, in_channels, se_channels):
+        super(SE, self).__init__()
+        self.se1 = nn.Conv2d(in_channels, se_channels,
+                             kernel_size=1, bias=True)
+        self.se2 = nn.Conv2d(se_channels, in_channels,
+                             kernel_size=1, bias=True)
+
+    def forward(self, x):
+        out = F.adaptive_avg_pool2d(x, (1, 1))
+        out = swish(self.se1(out))
+        out = self.se2(out).sigmoid()
+        out = x * out
+        return out
+
+
+class Block(nn.Module):
+    '''expansion + depthwise + pointwise + squeeze-excitation'''
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride,
+                 expand_ratio=1,
+                 se_ratio=0.,
+                 drop_rate=0.):
+        super(Block, self).__init__()
+        self.stride = stride
+        self.drop_rate = drop_rate
+        self.expand_ratio = expand_ratio
+
+        # Expansion
+        channels = expand_ratio * in_channels
+        self.conv1 = nn.Conv2d(in_channels,
+                               channels,
+                               kernel_size=1,
+                               stride=1,
+                               padding=0,
+                               bias=False)
+        self.bn1 = nn.BatchNorm2d(channels)
+
+        # Depthwise conv
+        self.conv2 = nn.Conv2d(channels,
+                               channels,
+                               kernel_size=kernel_size,
+                               stride=stride,
+                               padding=(1 if kernel_size == 3 else 2),
+                               groups=channels,
+                               bias=False)
+        self.bn2 = nn.BatchNorm2d(channels)
+
+        # SE layers
+        se_channels = int(in_channels * se_ratio)
+        self.se = SE(channels, se_channels)
+
+        # Output
+        self.conv3 = nn.Conv2d(channels,
+                               out_channels,
+                               kernel_size=1,
+                               stride=1,
+                               padding=0,
+                               bias=False)
+        self.bn3 = nn.BatchNorm2d(out_channels)
+
+        # Skip connection if in and out shapes are the same (MV-V2 style)
+        self.has_skip = (stride == 1) and (in_channels == out_channels)
+
+    def forward(self, x):
+        out = x if self.expand_ratio == 1 else swish(self.bn1(self.conv1(x)))
+        out = swish(self.bn2(self.conv2(out)))
+        out = self.se(out)
+        out = self.bn3(self.conv3(out))
+        if self.has_skip:
+            if self.training and self.drop_rate > 0:
+                out = drop_connect(out, self.drop_rate)
+            out = out + x
+        return out
+
+
+class EfficientNet(nn.Module):
+    def __init__(self, cfg, num_classes=10):
+        super(EfficientNet, self).__init__()
+        self.cfg = cfg
+        self.conv1 = nn.Conv2d(3,
+                               32,
+                               kernel_size=3,
+                               stride=1,
+                               padding=1,
+                               bias=False)
+        self.bn1 = nn.BatchNorm2d(32)
+        self.layers = self._make_layers(in_channels=32)
+        self.linear = nn.Linear(cfg['out_channels'][-1], num_classes)
+
+    def _make_layers(self, in_channels):
+        layers = []
+        cfg = [self.cfg[k] for k in ['expansion', 'out_channels', 'num_blocks', 'kernel_size',
+                                     'stride']]
+        b = 0
+        blocks = sum(self.cfg['num_blocks'])
+        for expansion, out_channels, num_blocks, kernel_size, stride in zip(*cfg):
+            strides = [stride] + [1] * (num_blocks - 1)
+            for stride in strides:
+                drop_rate = self.cfg['drop_connect_rate'] * b / blocks
+                layers.append(
+                    Block(in_channels,
+                          out_channels,
+                          kernel_size,
+                          stride,
+                          expansion,
+                          se_ratio=0.25,
+                          drop_rate=drop_rate))
+                in_channels = out_channels
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        out = swish(self.bn1(self.conv1(x)))
+        out = self.layers(out)
+        out = F.adaptive_avg_pool2d(out, 1)
+        out = out.view(out.size(0), -1)
+        dropout_rate = self.cfg['dropout_rate']
+        if self.training and dropout_rate > 0:
+            out = F.dropout(out, p=dropout_rate)
+        out = self.linear(out)
+        return out
+
+
+def EfficientNetB0():
+    cfg = {
+        'num_blocks': [1, 2, 2, 3, 3, 4, 1],
+        'expansion': [1, 6, 6, 6, 6, 6, 6],
+        'out_channels': [16, 24, 40, 80, 112, 192, 320],
+        'kernel_size': [3, 3, 5, 3, 5, 5, 3],
+        'stride': [1, 2, 2, 2, 1, 2, 1],
+        'dropout_rate': 0.2,
+        'drop_connect_rate': 0.2,
+    }
+    return EfficientNet(cfg)
+
+
+def test():
+    net = EfficientNetB0()
+    x = torch.randn(2, 3, 32, 32)
+    y = net(x)
+    print(y.shape)
+
+
+if __name__ == '__main__':
+    test()
--- a/models/Old/googlenet.py
+++ b/models/Old/googlenet.py
@ -0,0 +1,107 @@
+'''GoogLeNet with PyTorch.'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class Inception(nn.Module):
+    def __init__(self, in_planes, n1x1, n3x3red, n3x3, n5x5red, n5x5, pool_planes):
+        super(Inception, self).__init__()
+        # 1x1 conv branch
+        self.b1 = nn.Sequential(
+            nn.Conv2d(in_planes, n1x1, kernel_size=1),
+            nn.BatchNorm2d(n1x1),
+            nn.ReLU(True),
+        )
+
+        # 1x1 conv -> 3x3 conv branch
+        self.b2 = nn.Sequential(
+            nn.Conv2d(in_planes, n3x3red, kernel_size=1),
+            nn.BatchNorm2d(n3x3red),
+            nn.ReLU(True),
+            nn.Conv2d(n3x3red, n3x3, kernel_size=3, padding=1),
+            nn.BatchNorm2d(n3x3),
+            nn.ReLU(True),
+        )
+
+        # 1x1 conv -> 5x5 conv branch
+        self.b3 = nn.Sequential(
+            nn.Conv2d(in_planes, n5x5red, kernel_size=1),
+            nn.BatchNorm2d(n5x5red),
+            nn.ReLU(True),
+            nn.Conv2d(n5x5red, n5x5, kernel_size=3, padding=1),
+            nn.BatchNorm2d(n5x5),
+            nn.ReLU(True),
+            nn.Conv2d(n5x5, n5x5, kernel_size=3, padding=1),
+            nn.BatchNorm2d(n5x5),
+            nn.ReLU(True),
+        )
+
+        # 3x3 pool -> 1x1 conv branch
+        self.b4 = nn.Sequential(
+            nn.MaxPool2d(3, stride=1, padding=1),
+            nn.Conv2d(in_planes, pool_planes, kernel_size=1),
+            nn.BatchNorm2d(pool_planes),
+            nn.ReLU(True),
+        )
+
+    def forward(self, x):
+        y1 = self.b1(x)
+        y2 = self.b2(x)
+        y3 = self.b3(x)
+        y4 = self.b4(x)
+        return torch.cat([y1,y2,y3,y4], 1)
+
+
+class GoogLeNet(nn.Module):
+    def __init__(self):
+        super(GoogLeNet, self).__init__()
+        self.pre_layers = nn.Sequential(
+            nn.Conv2d(3, 192, kernel_size=3, padding=1),
+            nn.BatchNorm2d(192),
+            nn.ReLU(True),
+        )
+
+        self.a3 = Inception(192,  64,  96, 128, 16, 32, 32)
+        self.b3 = Inception(256, 128, 128, 192, 32, 96, 64)
+
+        self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
+
+        self.a4 = Inception(480, 192,  96, 208, 16,  48,  64)
+        self.b4 = Inception(512, 160, 112, 224, 24,  64,  64)
+        self.c4 = Inception(512, 128, 128, 256, 24,  64,  64)
+        self.d4 = Inception(512, 112, 144, 288, 32,  64,  64)
+        self.e4 = Inception(528, 256, 160, 320, 32, 128, 128)
+
+        self.a5 = Inception(832, 256, 160, 320, 32, 128, 128)
+        self.b5 = Inception(832, 384, 192, 384, 48, 128, 128)
+
+        self.avgpool = nn.AvgPool2d(8, stride=1)
+        self.linear = nn.Linear(1024, 10)
+
+    def forward(self, x):
+        out = self.pre_layers(x)
+        out = self.a3(out)
+        out = self.b3(out)
+        out = self.maxpool(out)
+        out = self.a4(out)
+        out = self.b4(out)
+        out = self.c4(out)
+        out = self.d4(out)
+        out = self.e4(out)
+        out = self.maxpool(out)
+        out = self.a5(out)
+        out = self.b5(out)
+        out = self.avgpool(out)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+
+
+def test():
+    net = GoogLeNet()
+    x = torch.randn(1,3,32,32)
+    y = net(x)
+    print(y.size())
+
+# test()
--- a/models/Old/lenet.py
+++ b/models/Old/lenet.py
@ -0,0 +1,23 @@
+'''LeNet in PyTorch.'''
+import torch.nn as nn
+import torch.nn.functional as F
+
+class LeNet(nn.Module):
+    def __init__(self):
+        super(LeNet, self).__init__()
+        self.conv1 = nn.Conv2d(3, 6, 5)
+        self.conv2 = nn.Conv2d(6, 16, 5)
+        self.fc1   = nn.Linear(16*5*5, 120)
+        self.fc2   = nn.Linear(120, 84)
+        self.fc3   = nn.Linear(84, 10)
+
+    def forward(self, x):
+        out = F.relu(self.conv1(x))
+        out = F.max_pool2d(out, 2)
+        out = F.relu(self.conv2(out))
+        out = F.max_pool2d(out, 2)
+        out = out.view(out.size(0), -1)
+        out = F.relu(self.fc1(out))
+        out = F.relu(self.fc2(out))
+        out = self.fc3(out)
+        return out
--- a/models/Old/mobilenet.py
+++ b/models/Old/mobilenet.py
@ -0,0 +1,61 @@
+'''MobileNet in PyTorch.
+
+See the paper "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications"
+for more details.
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class Block(nn.Module):
+    '''Depthwise conv + Pointwise conv'''
+    def __init__(self, in_planes, out_planes, stride=1):
+        super(Block, self).__init__()
+        self.conv1 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=in_planes, bias=False)
+        self.bn1 = nn.BatchNorm2d(in_planes)
+        self.conv2 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
+        self.bn2 = nn.BatchNorm2d(out_planes)
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = F.relu(self.bn2(self.conv2(out)))
+        return out
+
+
+class MobileNet(nn.Module):
+    # (128,2) means conv planes=128, conv stride=2, by default conv stride=1
+    cfg = [64, (128,2), 128, (256,2), 256, (512,2), 512, 512, 512, 512, 512, (1024,2), 1024]
+
+    def __init__(self, num_classes=10):
+        super(MobileNet, self).__init__()
+        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(32)
+        self.layers = self._make_layers(in_planes=32)
+        self.linear = nn.Linear(1024, num_classes)
+
+    def _make_layers(self, in_planes):
+        layers = []
+        for x in self.cfg:
+            out_planes = x if isinstance(x, int) else x[0]
+            stride = 1 if isinstance(x, int) else x[1]
+            layers.append(Block(in_planes, out_planes, stride))
+            in_planes = out_planes
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layers(out)
+        out = F.avg_pool2d(out, 2)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+
+
+def test():
+    net = MobileNet()
+    x = torch.randn(1,3,32,32)
+    y = net(x)
+    print(y.size())
+
+# test()
--- a/models/Old/mobilenetv2.py
+++ b/models/Old/mobilenetv2.py
@ -0,0 +1,86 @@
+'''MobileNetV2 in PyTorch.
+
+See the paper "Inverted Residuals and Linear Bottlenecks:
+Mobile Networks for Classification, Detection and Segmentation" for more details.
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class Block(nn.Module):
+    '''expand + depthwise + pointwise'''
+    def __init__(self, in_planes, out_planes, expansion, stride):
+        super(Block, self).__init__()
+        self.stride = stride
+
+        planes = expansion * in_planes
+        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, stride=1, padding=0, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, groups=planes, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
+        self.bn3 = nn.BatchNorm2d(out_planes)
+
+        self.shortcut = nn.Sequential()
+        if stride == 1 and in_planes != out_planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False),
+                nn.BatchNorm2d(out_planes),
+            )
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = F.relu(self.bn2(self.conv2(out)))
+        out = self.bn3(self.conv3(out))
+        out = out + self.shortcut(x) if self.stride==1 else out
+        return out
+
+
+class MobileNetV2(nn.Module):
+    # (expansion, out_planes, num_blocks, stride)
+    cfg = [(1,  16, 1, 1),
+           (6,  24, 2, 1),  # NOTE: change stride 2 -> 1 for CIFAR10
+           (6,  32, 3, 2),
+           (6,  64, 4, 2),
+           (6,  96, 3, 1),
+           (6, 160, 3, 2),
+           (6, 320, 1, 1)]
+
+    def __init__(self, num_classes=10):
+        super(MobileNetV2, self).__init__()
+        # NOTE: change conv1 stride 2 -> 1 for CIFAR10
+        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(32)
+        self.layers = self._make_layers(in_planes=32)
+        self.conv2 = nn.Conv2d(320, 1280, kernel_size=1, stride=1, padding=0, bias=False)
+        self.bn2 = nn.BatchNorm2d(1280)
+        self.linear = nn.Linear(1280, num_classes)
+
+    def _make_layers(self, in_planes):
+        layers = []
+        for expansion, out_planes, num_blocks, stride in self.cfg:
+            strides = [stride] + [1]*(num_blocks-1)
+            for stride in strides:
+                layers.append(Block(in_planes, out_planes, expansion, stride))
+                in_planes = out_planes
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layers(out)
+        out = F.relu(self.bn2(self.conv2(out)))
+        # NOTE: change pooling kernel_size 7 -> 4 for CIFAR10
+        out = F.avg_pool2d(out, 4)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+
+
+def test():
+    net = MobileNetV2()
+    x = torch.randn(2,3,32,32)
+    y = net(x)
+    print(y.size())
+
+# test()
--- a/models/Old/mylenet.py
+++ b/models/Old/mylenet.py
@ -0,0 +1,71 @@
+'''LeNet in PyTorch.'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+class MyLeNet(nn.Module):
+    def __init__(self):
+        super(MyLeNet, self).__init__()
+        self.conv1 = nn.Conv2d(3, 6, 5)
+        self.conv2 = nn.Conv2d(6, 16, 5)
+        self.fc1   = nn.Linear(16*5*5, 120)
+        self.fc2   = nn.Linear(120, 84)
+        self.fc3   = nn.Linear(84, 10)        
+
+    def savg_pool2d(self,x,size):
+        b,c,h,w = x.shape
+        selh = torch.LongTensor(h/size,w/size).random_(0, size)
+        rngh = torch.arange(0,h,size).long().view(h/size,1).repeat(1,w/size).view(h/size,w/size)
+        selx = (selh+rngh).repeat(b,c,1,1)
+
+        selw = torch.LongTensor(h/size,w/size).random_(0, size)
+        rngw = torch.arange(0,w,size).long().view(1,h/size).repeat(h/size,1).view(h/size,w/size)
+        sely = (selw+rngw).repeat(b,c,1,1)
+        bv, cv ,hv, wv = torch.meshgrid([torch.arange(0,b), torch.arange(0,c),torch.arange(0,h/size),torch.arange(0,w/size)])
+        #x=x.view(b,c,h*w)
+        newx = x[bv,cv, selx, sely]
+        #ghdh
+        return newx
+
+    def ssoftmax_pool2d(self,x,size,idx):
+        b,c,h,w = x.shape
+        w = wdataset[idx]
+        selh = torch.LongTensor(h/size,w/size).random_(0, size)
+        rngh = torch.arange(0,h,size).long().view(h/size,1).repeat(1,w/size).view(h/size,w/size)
+        selx = (selh+rngh).repeat(b,c,1,1)
+
+        selw = torch.LongTensor(h/size,w/size).random_(0, size)
+        rngw = torch.arange(0,w,size).long().view(1,h/size).repeat(h/size,1).view(h/size,w/size)
+        sely = (selw+rngw).repeat(b,c,1,1)
+        bv, cv ,hv, wv = torch.meshgrid([torch.arange(0,b), torch.arange(0,c),torch.arange(0,h/size),torch.arange(0,w/size)])
+        #x=x.view(b,c,h*w)
+        newx = x[bv,cv, selx, sely]
+        #ghdh
+        return newx
+
+    def mavg_pool2d(self,x,size):
+        b,c,h,w = x.shape
+        #newx=(x[:,:,0::2,0::2]+x[:,:,1::2,0::2]+x[:,:,0::2,1::2]+x[:,:,1::2,1::2])/4
+        newx=(x[:,:,0::2,0::2])
+        return newx
+
+
+    def forward(self, x, stoch=True):
+        if self.training==False:
+            stoch=False
+        out = F.relu(self.conv1(x))
+        if stoch:
+            out = self.savg_pool2d(out, 2)
+        else:
+            out = F.avg_pool2d(out, 2)
+        out = F.relu(self.conv2(out))
+        if stoch:
+            out = self.savg_pool2d(out, 2)
+        else:
+            out = F.avg_pool2d(out, 2)
+        out = out.view(out.size(0), -1)
+        out = F.relu(self.fc1(out))
+        out = F.relu(self.fc2(out))
+        out = self.fc3(out)
+        return out
+    
--- a/models/Old/mylenet2.py
+++ b/models/Old/mylenet2.py
@ -0,0 +1,123 @@
+'''LeNet in PyTorch.'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class MyLeNet2(nn.Module):
+    def __init__(self):
+        super(MyLeNet2, self).__init__()
+        self.conv1 = nn.Conv2d(3, 60, 5)
+        self.conv2 = nn.Conv2d(60, 160, 5)
+        self.fc1   = nn.Linear(160*5*5, 120)
+        self.fc2   = nn.Linear(120, 84)
+        self.fc3   = nn.Linear(84, 10)        
+
+# Vanilla Convolution
+    def myconv2d(self, input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1):
+        batch_size, in_channels, in_h, in_w = input.shape
+        out_channels, in_channels, kh, kw =  weight.shape
+        out_h = in_h-2*(int(kh)/2)
+        out_w = in_w-2*(int(kw)/2)
+
+        unfold = torch.nn.Unfold(kernel_size=(kh, kw), dilation=dilation, padding=padding, stride=stride)
+        inp_unf = unfold(input)#.view(batch_size,in_channels*kh*kw,out_h,out_w)
+                
+
+        if bias is None:
+            out_unf = inp_unf.transpose(1, 2).matmul(weight.view(weight.size(0), -1).t()).transpose(1, 2)
+        else:
+            out_unf = (inp_unf.transpose(1, 2).matmul(weight.view(weight.size(0), -1).t()) + bias).transpose(1, 2)
+
+        out = out_unf.view(batch_size, out_channels, out_h, out_w)
+        return out
+
+    def myconv2d_avg(self, input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1,size=2):
+        batch_size, in_channels, in_h, in_w = input.shape
+        out_channels, in_channels, kh, kw =  weight.shape
+        out_h = in_h-2*(int(kh)/2)
+        out_w = in_w-2*(int(kw)/2)
+
+        unfold = torch.nn.Unfold(kernel_size=(kh, kw), dilation=dilation, padding=padding, stride=stride)
+        inp_unf = unfold(input).view(batch_size,in_channels*kh*kw,out_h,out_w)
+        sel_h = torch.LongTensor(out_h/size,out_w/size).random_(0, size)#.cuda()
+        rng_h = sel_h + torch.arange(0,out_h,size).long()#.cuda()
+
+        sel_w = torch.LongTensor(out_h/size,out_w/size).random_(0, size)#.cuda()
+        rng_w = sel_w+torch.arange(0,out_w,size).long()#.cuda()
+        inp_unf = inp_unf[:,:,rng_h,rng_w].view(batch_size,in_channels*kh*kw,out_h/size*out_w/size)
+        #unfold_avg = torch.nn.Unfold(kernel_size=(1, 1), dilation=1, padding=0, stride=2)
+
+        if bias is None:
+            out_unf = inp_unf.transpose(1, 2).matmul(weight.view(weight.size(0), -1).t()).transpose(1, 2)
+        else:
+            out_unf = (inp_unf.transpose(1, 2).matmul(weight.view(weight.size(0), -1).t()) + bias).transpose(1, 2)
+
+        out = out_unf.view(batch_size, out_channels, out_h/size, out_w/size).contiguous()
+        return out
+
+
+    def savg_pool2d(self,x,size):
+        b,c,h,w = x.shape
+        selh = torch.LongTensor(h/size,w/size).random_(0, size)
+        rngh = torch.arange(0,h,size).long().view(h/size,1).repeat(1,w/size).view(h/size,w/size)
+        selx = (selh+rngh).repeat(b,c,1,1)
+
+        selw = torch.LongTensor(h/size,w/size).random_(0, size)
+        rngw = torch.arange(0,w,size).long().view(1,h/size).repeat(h/size,1).view(h/size,w/size)
+        sely = (selw+rngw).repeat(b,c,1,1)
+        bv, cv ,hv, wv = torch.meshgrid([torch.arange(0,b), torch.arange(0,c),torch.arange(0,h/size),torch.arange(0,w/size)])
+        #x=x.view(b,c,h*w)
+        newx = x[bv,cv, selx, sely]
+        #ghdh
+        return newx
+
+    def ssoftmax_pool2d(self,x,size,idx):
+        b,c,h,w = x.shape
+        w = wdataset[idx]
+        selh = torch.LongTensor(h/size,w/size).random_(0, size)
+        rngh = torch.arange(0,h,size).long().view(h/size,1).repeat(1,w/size).view(h/size,w/size)
+        selx = (selh+rngh).repeat(b,c,1,1)
+
+        selw = torch.LongTensor(h/size,w/size).random_(0, size)
+        rngw = torch.arange(0,w,size).long().view(1,h/size).repeat(h/size,1).view(h/size,w/size)
+        sely = (selw+rngw).repeat(b,c,1,1)
+        bv, cv ,hv, wv = torch.meshgrid([torch.arange(0,b), torch.arange(0,c),torch.arange(0,h/size),torch.arange(0,w/size)])
+        #x=x.view(b,c,h*w)
+        newx = x[bv,cv, selx, sely]
+        #ghdh
+        return newx
+
+    def mavg_pool2d(self,x,size):
+        b,c,h,w = x.shape
+        #newx=(x[:,:,0::2,0::2]+x[:,:,1::2,0::2]+x[:,:,0::2,1::2]+x[:,:,1::2,1::2])/4
+        newx=(x[:,:,0::2,0::2])
+        return newx
+
+
+    def forward(self, x, stoch=True):
+        if self.training==False:
+            stoch=False
+        #out = F.relu(self.conv1(x))
+        out = F.relu(self.myconv2d(x, self.conv1.weight, bias=self.conv1.bias))
+        if stoch:
+            out = self.savg_pool2d(out, 2)
+        else:
+            out = F.avg_pool2d(out, 2)
+        #out = F.relu(self.conv2(out))
+        if 0:
+            out = F.relu(self.myconv2d_avg(out, self.conv2.weight, bias=self.conv2.bias,size=2)) 
+        else:
+            #out = F.relu(self.conv2(out))
+            out = F.relu(self.myconv2d(out, self.conv2.weight, bias=self.conv2.bias))     
+            out = F.avg_pool2d(out, 2)
+        #if stoch:
+        #    out = self.savg_pool2d(out, 2)
+        #else:
+        #    out = F.avg_pool2d(out, 2)
+        out = out.view(out.size(0), -1 )
+        out = F.relu(self.fc1(out))
+        out = F.relu(self.fc2(out))
+        out = self.fc3(out)
+        return out
+    
--- a/models/Old/mylenet3.py
+++ b/models/Old/mylenet3.py
@ -0,0 +1,238 @@
+'''LeNet in PyTorch.'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+import math
+
+from .sconv2davg import SConv2dAvg
+
+class MyLeNetNormal(nn.Module):#epoch 12s
+    def __init__(self):
+        super(MyLeNetNormal, self).__init__()
+        self.conv1 = nn.Conv2d(3, 200, 5, stride=1)
+        self.conv2 = nn.Conv2d(200, 400, 3, stride=1)
+        self.conv3 = nn.Conv2d(400, 800, 3, stride=1)
+        self.fc1   = nn.Linear(800, 10)
+
+    def forward(self, x, stoch=True):
+        _,_,h0,w0 = x.shape
+        out = F.relu(self.conv1(x))
+        _,_,h1,w1 = out.shape
+        out = F.avg_pool2d(out,2,ceil_mode=True)
+        out = F.relu(self.conv2(out))
+        _,_,h2,w2 = out.shape
+        out = F.avg_pool2d(out,2,ceil_mode=True)
+        out = F.relu(self.conv3(out))
+        out = F.avg_pool2d(out,4,ceil_mode=True)
+        
+        out = out.view(out.size(0), -1 )
+        out = (self.fc1(out))
+
+        return out
+
+def savg_pool2d(x,size,ceil_mode=False):
+    b,c,h,w = x.shape
+    device = x.device
+    if ceil_mode:
+        out_h = math.ceil(h/size)
+        out_w = math.ceil(w/size)
+    else:
+        out_h = math.floor(h/size)
+        out_w = math.floor(w/size)
+    selh = torch.randint(size,(out_h,out_w), device=device)
+    #selh[:] = 0
+    rngh = torch.arange(0,h,size,device=x.device).view(-1,1)
+    selh = selh+rngh
+
+    selw = torch.randint(size,(out_h,out_w), device=device)
+    #selw[:] = 0
+    rngw = torch.arange(0,w,size,device=x.device)
+    selw = selw+rngw
+
+    newx = x[:,:, selh, selw]
+    return newx
+
+def savg_pool2d_(x,size,ceil_mode=False):
+    b,c,h,w = x.shape
+    device = x.device
+    selh = torch.randint(size,(math.floor(h/size),math.floor(w/size)), device=device)
+    rngh = torch.arange(0,h,size, device=device).long().view(h/size,1).repeat(1,w/size).view(math.floor(h/size),math.floor(w/size))
+    selx = (selh+rngh).repeat(b,c,1,1)
+
+    selw = torch.randint(size,(math.floor(h/size),math.floor(w/size)), device=device)
+    rngw = torch.arange(0,w,size, device=device).long().view(1,h/size).repeat(h/size,1).view(math.floor(h/size),math.floor(w/size))
+    sely = (selw+rngw).repeat(b,c,1,1)
+    bv, cv ,hv, wv = torch.meshgrid([torch.arange(0,b), torch.arange(0,c),torch.arange(0,h/size),torch.arange(0,w/size)])
+    #x=x.view(b,c,h*w)
+    newx = x[bv,cv, selx, sely]
+    #ghdh
+    return newx
+
+class MyLeNetSimNormal(nn.Module):#epoch 12s
+    def __init__(self):
+        super(MyLeNetSimNormal, self).__init__()
+        self.conv1 = nn.Conv2d(3, 200, 5, stride=1)
+        self.conv2 = nn.Conv2d(200, 400, 3, stride=1)
+        self.conv3 = nn.Conv2d(400, 800, 3, stride=1)
+        self.fc1   = nn.Linear(800, 10)
+
+    def forward(self, x, stoch=True):
+
+        #stoch=True
+        out = F.relu(self.conv1(x))
+        if stoch:
+            out = savg_pool2d(out,2,ceil_mode=True)
+        else:
+            out = F.avg_pool2d(out,2,ceil_mode=True)
+        out = F.relu(self.conv2(out))
+        if stoch:
+            out = savg_pool2d(out,2,ceil_mode=True)
+        else:
+            out = F.avg_pool2d(out,2,ceil_mode=True)
+        out = F.relu(self.conv3(out))
+        if stoch:
+            out = savg_pool2d(out,4,ceil_mode=True)
+        else:
+            out = F.avg_pool2d(out,4,ceil_mode=True)
+        
+        out = out.view(out.size(0), -1 )
+        out = (self.fc1(out))
+        return out
+
+
+class MyLeNetStride(nn.Module):#epoch 6s
+    def __init__(self):
+        super(MyLeNetStride, self).__init__()
+        self.conv1 = nn.Conv2d(3, 200, 5, stride=2)
+        self.conv2 = nn.Conv2d(200, 400, 3, stride=2)
+        self.conv3 = nn.Conv2d(400, 800, 3, stride=4)
+        self.fc1   = nn.Linear(800, 10)
+
+    def forward(self, x, stoch=True):
+
+        out = F.relu(self.conv1(x))
+        out = F.relu(self.conv2(out))
+        out = F.relu(self.conv3(out))
+
+        out = out.view(out.size(0), -1 )
+        out = (self.fc1(out))
+        return out
+
+class MyLeNetMatNormal(nn.Module):#epach 21s
+    def __init__(self):
+        super(MyLeNetMatNormal, self).__init__()
+        self.conv1 = SConv2dAvg(3, 200, 5, stride=1)
+        self.conv2 = SConv2dAvg(200, 400, 3, stride=1)
+        self.conv3 = SConv2dAvg(400, 800, 3, stride=1)
+        self.fc1   = nn.Linear(800, 10)
+
+    def forward(self, x, stoch=True):
+        _,_,h0,w0 = x.shape
+        out = F.relu(self.conv1(x))
+        out = F.avg_pool2d(out,2,ceil_mode=True)
+
+        _,_,h1,w1 = out.shape
+        out = F.relu(self.conv2(out))
+        out = F.avg_pool2d(out,2,ceil_mode=True)
+
+        _,_,h2,w2 = out.shape
+        out = F.relu(self.conv3(out))
+        out = F.avg_pool2d(out,4,ceil_mode=True)
+
+        out = out.view(out.size(0), -1 )
+        out = (self.fc1(out))
+
+        if 1:
+            comp = 0
+            comp+=self.conv1.comp(h0,w0)
+            comp+=self.conv2.comp(h1,w1)
+            comp+=self.conv3.comp(h2,w2)
+            self.comp = comp/1000000
+        return out
+
+
+class MyLeNetMatStoch(nn.Module):#epoch 17s
+    def __init__(self):
+        super(MyLeNetMatStoch, self).__init__()
+        self.conv1 = SConv2dAvg(3, 200, 5, stride=2,ceil_mode=True)
+        self.conv2 = SConv2dAvg(200, 400, 3, stride=2,ceil_mode=True)
+        self.conv3 = SConv2dAvg(400, 800, 3, stride=4,ceil_mode=True)
+        self.fc1   = nn.Linear(800, 10)
+
+    def forward(self, x, stoch=True):
+        # if stoch:
+        _,_,h0,w0=x.shape
+        out = F.relu(self.conv1(x,stoch=stoch))
+        _,_,h1,w1=out.shape
+        out = F.relu(self.conv2(out,stoch=stoch))
+        _,_,h2,w2=out.shape
+        out = F.relu(self.conv3(out,stoch=stoch))
+        # else:
+        #     out = F.relu(self.conv1(x,stoch=True,stride=1))
+        #     out = F.avg_pool2d(out,2,ceil_mode=True)
+        #     out = F.relu(self.conv2(out,stoch=True,stride=1))
+        #     out = F.avg_pool2d(out,2,ceil_mode=True)
+        #     out = F.relu(self.conv3(out,stoch=True,stride=1))
+        #     out = F.avg_pool2d(out,4,ceil_mode=True)
+
+        out = out.view(out.size(0), -1 )
+        out = self.fc1(out)
+        #Estimate computation
+        if 1:
+            comp = 0
+            comp+=self.conv1.comp(h0,w0)
+            comp+=self.conv2.comp(h1,w1)
+            comp+=self.conv3.comp(h2,w2)
+            self.comp = comp/1000000
+        return out
+    
+class MyLeNetMatStochBU(nn.Module):#epoch 11s 
+    def __init__(self):
+        super(MyLeNetMatStochBU, self).__init__()
+        self.conv1 = SConv2dAvg(3, 200, 5, stride=2,ceil_mode=True)
+        self.conv2 = SConv2dAvg(200, 400, 3, stride=2,ceil_mode=True)
+        self.conv3 = SConv2dAvg(400, 800, 3, stride=4,ceil_mode=True)
+        self.fc1   = nn.Linear(800, 10)
+
+    def forward(self, x, stoch=True):
+        #get sizes
+        h0,w0 = x.shape[2],x.shape[3]
+        h1,w1 = self.conv1.get_size(h0,w0)
+        h2,w2 = self.conv2.get_size(h1,w1)
+        h3,w3 = self.conv3.get_size(h2,w2)
+        # print('Shapes :')
+        # print('0', h0, w0)
+        # print('1', h1, w1)
+        # print('2', h2, w2)
+        # print('3', h3, w3)
+        #sample BU
+        # mask3 = torch.ones(h3,w3).cuda()
+        mask3 = torch.ones((h3,w3), device=x.device)
+        selh3,selw3,mask2 = self.conv3.sample(h2,w2,mask=mask3)
+        selh2,selw2,mask1 = self.conv2.sample(h1,w1,mask=mask2)
+        selh1,selw1,mask0 = self.conv1.sample(h0,w0,mask=mask1)
+        #forward
+        if stoch:
+            out = F.relu(self.conv1(x,selh1,selw1,mask1,stoch=stoch))
+            out = F.relu(self.conv2(out,selh2,selw2,mask2,stoch=stoch))
+            out = F.relu(self.conv3(out,selh3,selw3,mask3,stoch=stoch))
+        else:
+            out = F.relu(self.conv1(x,stoch=True,stride=1))
+            out = F.avg_pool2d(out,2,ceil_mode=True)
+            out = F.relu(self.conv2(out,stoch=True,stride=1))
+            out = F.avg_pool2d(out,2,ceil_mode=True)
+            out = F.relu(self.conv3(out,stoch=True,stride=1))
+            out = F.avg_pool2d(out,4,ceil_mode=True)
+
+        out = out.view(out.size(0), -1 )
+        out = (self.fc1(out))
+        #Estimate computation
+        if 1:
+            comp = 0
+            comp+=self.conv1.comp(h0,w0,mask1)
+            comp+=self.conv2.comp(h1,w1,mask2)
+            comp+=self.conv3.comp(h2,w2,mask3)
+            self.comp = comp.item()/1000000
+        return out
+
--- a/models/Old/myresnet.py
+++ b/models/Old/myresnet.py
@ -0,0 +1,159 @@
+'''ResNet in PyTorch.
+
+For Pre-activation ResNet, see 'preact_resnet.py'.
+
+Reference:
+[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
+    Deep Residual Learning for Image Recognition. arXiv:1512.03385
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, in_planes, planes, stride=1):
+        super(BasicBlock, self).__init__()
+        self.conv1 = nn.Conv2d(
+            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
+                               stride=1, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != self.expansion*planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, self.expansion*planes,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(self.expansion*planes)
+            )
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.bn2(self.conv2(out))
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+
+
+class Bottleneck(nn.Module):
+    expansion = 4
+
+    def __init__(self, in_planes, planes, stride=1):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
+                               stride=stride, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(planes, self.expansion *
+                               planes, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(self.expansion*planes)
+
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != self.expansion*planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, self.expansion*planes,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(self.expansion*planes)
+            )
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = F.relu(self.bn2(self.conv2(out)))
+        out = self.bn3(self.conv3(out))
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+
+
+class ResNet(nn.Module):
+    def __init__(self, block, num_blocks, num_classes=10,stoch=False):
+        super(ResNet, self).__init__()
+        self.in_planes = 64
+
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
+                               stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
+        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
+        self.linear = nn.Linear(512*block.expansion, num_classes)
+        self.stoch = stoch
+
+    def _make_layer(self, block, planes, num_blocks, stride):
+        strides = [stride] + [1]*(num_blocks-1)
+        layers = []
+        for stride in strides:
+            layers.append(block(self.in_planes, planes, stride))
+            self.in_planes = planes * block.expansion
+        return nn.Sequential(*layers)
+
+    def savg_pool2d(self,x,size,locx=-1,locy=-1):
+        b,c,h,w = x.shape
+        if loc==-1:
+            selh = torch.LongTensor(h/size,w/size).random_(0, size)
+        else:
+            selh = torch.ones(h/size,w/size).long()*loc
+        rngh = torch.arange(0,h,size).long().view(h/size,1).repeat(1,w/size).view(h/size,w/size)
+        selx = (selh+rngh).repeat(b,c,1,1)
+        if loc==-1:
+            selw = torch.LongTensor(h/size,w/size).random_(0, size)
+        else:
+            selw = torch.ones(h/size,w/size).long()*loc
+        rngw = torch.arange(0,w,size).long().view(1,h/size).repeat(h/size,1).view(h/size,w/size)
+        sely = (selw+rngw).repeat(b,c,1,1)
+        bv, cv ,hv, wv = torch.meshgrid([torch.arange(0,b), torch.arange(0,c),torch.arange(0,h/size),torch.arange(0,w/size)])
+        #x=x.view(b,c,h*w)
+        newx = x[bv,cv, selx, sely]
+        #ghdh
+        return newx
+
+    def forward(self, x ,stoch = True):
+        #if self.training==False:
+        #    stoch=False
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = self.layer4(out)
+        if self.stoch:
+            if stoch:
+                out = self.savg_pool2d(out, 4)
+        else:
+            out = F.avg_pool2d(out, 4)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+
+
+def MyResNet18(stoch=False):
+    return ResNet(BasicBlock, [2, 2, 2, 2],stoch=stoch)
+
+
+def ResNet34():
+    return ResNet(BasicBlock, [3, 4, 6, 3])
+
+
+def MyResNet50():
+    return ResNet(Bottleneck, [3, 4, 6, 3])
+
+
+def ResNet101():
+    return ResNet(Bottleneck, [3, 4, 23, 3])
+
+
+def ResNet152():
+    return ResNet(Bottleneck, [3, 8, 36, 3])
+
+
+def test():
+    net = ResNet18()
+    y = net(torch.randn(1, 3, 32, 32))
+    print(y.size())
+
+# test()
--- a/models/Old/myresnet2.py
+++ b/models/Old/myresnet2.py
@ -0,0 +1,187 @@
+'''ResNet in PyTorch.
+
+For Pre-activation ResNet, see 'preact_resnet.py'.
+
+Reference:
+[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
+    Deep Residual Learning for Image Recognition. arXiv:1512.03385
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, in_planes, planes, stride=1):
+        super(BasicBlock, self).__init__()
+        self.conv1 = nn.Conv2d(
+            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
+                               stride=1, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != self.expansion*planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, self.expansion*planes,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(self.expansion*planes)
+            )
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.bn2(self.conv2(out))
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+
+
+class Bottleneck(nn.Module):
+    expansion = 4
+
+    def __init__(self, in_planes, planes, stride=1):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
+                               stride=stride, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(planes, self.expansion *
+                               planes, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(self.expansion*planes)
+
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != self.expansion*planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, self.expansion*planes,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(self.expansion*planes)
+            )
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = F.relu(self.bn2(self.conv2(out)))
+        out = self.bn3(self.conv3(out))
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+
+
+class ResNet(nn.Module):
+    def __init__(self, block, num_blocks, num_classes=10,stoch=False):
+        super(ResNet, self).__init__()
+        self.in_planes = 64
+
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
+                               stride=1, padding=1, bias=False)
+        self.conv2 = nn.Conv2d(512, 512, kernel_size=3,
+                               stride=1, padding=1, bias=True)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
+        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
+        self.linear = nn.Linear(512*block.expansion, num_classes)
+        self.stoch = stoch
+
+    def _make_layer(self, block, planes, num_blocks, stride):
+        strides = [stride] + [1]*(num_blocks-1)
+        layers = []
+        for stride in strides:
+            layers.append(block(self.in_planes, planes, stride))
+            self.in_planes = planes * block.expansion
+        return nn.Sequential(*layers)
+
+    def myconv2d_avg(self, input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1,size=2):
+        batch_size, in_channels, in_h, in_w = input.shape
+        out_channels, in_channels, kh, kw =  weight.shape
+        out_h = (in_h+2*padding)-2*(int(kh)/2)
+        out_w = (in_w+2*padding)-2*(int(kw)/2)
+
+        unfold = torch.nn.Unfold(kernel_size=(kh, kw), dilation=dilation, padding=padding, stride=stride)
+        inp_unf = unfold(input).view(batch_size,in_channels*kh*kw,out_h,out_w)
+        sel_h = torch.LongTensor(out_h/size,out_w/size).random_(0, size)#.cuda()
+        rng_h = sel_h + torch.arange(0,out_h,size).long()#.cuda()
+
+        sel_w = torch.LongTensor(out_h/size,out_w/size).random_(0, size)#.cuda()
+        rng_w = sel_w+torch.arange(0,out_w,size).long()#.cuda()
+        inp_unf = inp_unf[:,:,rng_h,rng_w].view(batch_size,in_channels*kh*kw,out_h/size*out_w/size)
+        #unfold_avg = torch.nn.Unfold(kernel_size=(1, 1), dilation=1, padding=0, stride=2)
+
+        if bias is None:
+            out_unf = inp_unf.transpose(1, 2).matmul(weight.view(weight.size(0), -1).t()).transpose(1, 2)
+        else:
+            out_unf = (inp_unf.transpose(1, 2).matmul(weight.view(weight.size(0), -1).t()) + bias).transpose(1, 2)
+
+        out = out_unf.view(batch_size, out_channels, out_h/size, out_w/size).contiguous()
+        return out
+
+
+    def savg_pool2d(self,x,size,locx=-1,locy=-1):
+        b,c,h,w = x.shape
+        if locx==-1:
+            selh = torch.LongTensor(h/size,w/size).random_(0, size)
+        else:
+            selh = torch.ones(h/size,w/size).long()*loc
+        rngh = torch.arange(0,h,size).long().view(h/size,1).repeat(1,w/size).view(h/size,w/size)
+        selx = (selh+rngh).repeat(b,c,1,1)
+        if locy==-1:
+            selw = torch.LongTensor(h/size,w/size).random_(0, size)
+        else:
+            selw = torch.ones(h/size,w/size).long()*loc
+        rngw = torch.arange(0,w,size).long().view(1,h/size).repeat(h/size,1).view(h/size,w/size)
+        sely = (selw+rngw).repeat(b,c,1,1)
+        bv, cv ,hv, wv = torch.meshgrid([torch.arange(0,b), torch.arange(0,c),torch.arange(0,h/size),torch.arange(0,w/size)])
+        #x=x.view(b,c,h*w)
+        newx = x[bv,cv, selx, sely]
+        #ghdh
+        return newx
+
+    def forward(self, x ,stoch = True):
+        #if self.training==False:
+        #    stoch=False
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = self.layer4(out)
+        if self.stoch and stoch:
+            out = F.relu(self.myconv2d_avg(out, self.conv2.weight, bias=self.conv2.bias,padding=1,size=4)) 
+            #out = F.avg_pool2d(out, 2)
+        else:
+            out = F.relu(self.myconv2d_avg(out, self.conv2.weight, bias=self.conv2.bias,padding=1,size=1))
+            out = F.avg_pool2d(out, 4)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+
+
+def MyResNet18(stoch=False):
+    return ResNet(BasicBlock, [2, 2, 2, 2],stoch=stoch)
+
+
+def ResNet34():
+    return ResNet(BasicBlock, [3, 4, 6, 3])
+
+
+def MyResNet50():
+    return ResNet(Bottleneck, [3, 4, 6, 3])
+
+
+def ResNet101():
+    return ResNet(Bottleneck, [3, 4, 23, 3])
+
+
+def ResNet152():
+    return ResNet(Bottleneck, [3, 8, 36, 3])
+
+
+def test():
+    net = ResNet18()
+    y = net(torch.randn(1, 3, 32, 32))
+    print(y.size())
+
+# test()
--- a/models/Old/pnasnet.py
+++ b/models/Old/pnasnet.py
@ -0,0 +1,125 @@
+'''PNASNet in PyTorch.
+
+Paper: Progressive Neural Architecture Search
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class SepConv(nn.Module):
+    '''Separable Convolution.'''
+    def __init__(self, in_planes, out_planes, kernel_size, stride):
+        super(SepConv, self).__init__()
+        self.conv1 = nn.Conv2d(in_planes, out_planes,
+                               kernel_size, stride,
+                               padding=(kernel_size-1)//2,
+                               bias=False, groups=in_planes)
+        self.bn1 = nn.BatchNorm2d(out_planes)
+
+    def forward(self, x):
+        return self.bn1(self.conv1(x))
+
+
+class CellA(nn.Module):
+    def __init__(self, in_planes, out_planes, stride=1):
+        super(CellA, self).__init__()
+        self.stride = stride
+        self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride)
+        if stride==2:
+            self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
+            self.bn1 = nn.BatchNorm2d(out_planes)
+
+    def forward(self, x):
+        y1 = self.sep_conv1(x)
+        y2 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1)
+        if self.stride==2:
+            y2 = self.bn1(self.conv1(y2))
+        return F.relu(y1+y2)
+
+class CellB(nn.Module):
+    def __init__(self, in_planes, out_planes, stride=1):
+        super(CellB, self).__init__()
+        self.stride = stride
+        # Left branch
+        self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride)
+        self.sep_conv2 = SepConv(in_planes, out_planes, kernel_size=3, stride=stride)
+        # Right branch
+        self.sep_conv3 = SepConv(in_planes, out_planes, kernel_size=5, stride=stride)
+        if stride==2:
+            self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
+            self.bn1 = nn.BatchNorm2d(out_planes)
+        # Reduce channels
+        self.conv2 = nn.Conv2d(2*out_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
+        self.bn2 = nn.BatchNorm2d(out_planes)
+
+    def forward(self, x):
+        # Left branch
+        y1 = self.sep_conv1(x)
+        y2 = self.sep_conv2(x)
+        # Right branch
+        y3 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1)
+        if self.stride==2:
+            y3 = self.bn1(self.conv1(y3))
+        y4 = self.sep_conv3(x)
+        # Concat & reduce channels
+        b1 = F.relu(y1+y2)
+        b2 = F.relu(y3+y4)
+        y = torch.cat([b1,b2], 1)
+        return F.relu(self.bn2(self.conv2(y)))
+
+class PNASNet(nn.Module):
+    def __init__(self, cell_type, num_cells, num_planes):
+        super(PNASNet, self).__init__()
+        self.in_planes = num_planes
+        self.cell_type = cell_type
+
+        self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(num_planes)
+
+        self.layer1 = self._make_layer(num_planes, num_cells=6)
+        self.layer2 = self._downsample(num_planes*2)
+        self.layer3 = self._make_layer(num_planes*2, num_cells=6)
+        self.layer4 = self._downsample(num_planes*4)
+        self.layer5 = self._make_layer(num_planes*4, num_cells=6)
+
+        self.linear = nn.Linear(num_planes*4, 10)
+
+    def _make_layer(self, planes, num_cells):
+        layers = []
+        for _ in range(num_cells):
+            layers.append(self.cell_type(self.in_planes, planes, stride=1))
+            self.in_planes = planes
+        return nn.Sequential(*layers)
+
+    def _downsample(self, planes):
+        layer = self.cell_type(self.in_planes, planes, stride=2)
+        self.in_planes = planes
+        return layer
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = self.layer4(out)
+        out = self.layer5(out)
+        out = F.avg_pool2d(out, 8)
+        out = self.linear(out.view(out.size(0), -1))
+        return out
+
+
+def PNASNetA():
+    return PNASNet(CellA, num_cells=6, num_planes=44)
+
+def PNASNetB():
+    return PNASNet(CellB, num_cells=6, num_planes=32)
+
+
+def test():
+    net = PNASNetB()
+    x = torch.randn(1,3,32,32)
+    y = net(x)
+    print(y)
+
+# test()
--- a/models/Old/preact_resnet.py
+++ b/models/Old/preact_resnet.py
@ -0,0 +1,118 @@
+'''Pre-activation ResNet in PyTorch.
+
+Reference:
+[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
+    Identity Mappings in Deep Residual Networks. arXiv:1603.05027
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class PreActBlock(nn.Module):
+    '''Pre-activation version of the BasicBlock.'''
+    expansion = 1
+
+    def __init__(self, in_planes, planes, stride=1):
+        super(PreActBlock, self).__init__()
+        self.bn1 = nn.BatchNorm2d(in_planes)
+        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
+
+        if stride != 1 or in_planes != self.expansion*planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
+            )
+
+    def forward(self, x):
+        out = F.relu(self.bn1(x))
+        shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
+        out = self.conv1(out)
+        out = self.conv2(F.relu(self.bn2(out)))
+        out += shortcut
+        return out
+
+
+class PreActBottleneck(nn.Module):
+    '''Pre-activation version of the original Bottleneck module.'''
+    expansion = 4
+
+    def __init__(self, in_planes, planes, stride=1):
+        super(PreActBottleneck, self).__init__()
+        self.bn1 = nn.BatchNorm2d(in_planes)
+        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
+
+        if stride != 1 or in_planes != self.expansion*planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
+            )
+
+    def forward(self, x):
+        out = F.relu(self.bn1(x))
+        shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
+        out = self.conv1(out)
+        out = self.conv2(F.relu(self.bn2(out)))
+        out = self.conv3(F.relu(self.bn3(out)))
+        out += shortcut
+        return out
+
+
+class PreActResNet(nn.Module):
+    def __init__(self, block, num_blocks, num_classes=10):
+        super(PreActResNet, self).__init__()
+        self.in_planes = 64
+
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
+        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
+        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
+        self.linear = nn.Linear(512*block.expansion, num_classes)
+
+    def _make_layer(self, block, planes, num_blocks, stride):
+        strides = [stride] + [1]*(num_blocks-1)
+        layers = []
+        for stride in strides:
+            layers.append(block(self.in_planes, planes, stride))
+            self.in_planes = planes * block.expansion
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        out = self.conv1(x)
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = self.layer4(out)
+        out = F.avg_pool2d(out, 4)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+
+
+def PreActResNet18():
+    return PreActResNet(PreActBlock, [2,2,2,2])
+
+def PreActResNet34():
+    return PreActResNet(PreActBlock, [3,4,6,3])
+
+def PreActResNet50():
+    return PreActResNet(PreActBottleneck, [3,4,6,3])
+
+def PreActResNet101():
+    return PreActResNet(PreActBottleneck, [3,4,23,3])
+
+def PreActResNet152():
+    return PreActResNet(PreActBottleneck, [3,8,36,3])
+
+
+def test():
+    net = PreActResNet18()
+    y = net((torch.randn(1,3,32,32)))
+    print(y.size())
+
+# test()
--- a/models/Old/regnet.py
+++ b/models/Old/regnet.py
@ -0,0 +1,155 @@
+'''RegNet in PyTorch.
+
+Paper: "Designing Network Design Spaces".
+
+Reference: https://github.com/keras-team/keras-applications/blob/master/keras_applications/efficientnet.py
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class SE(nn.Module):
+    '''Squeeze-and-Excitation block.'''
+
+    def __init__(self, in_planes, se_planes):
+        super(SE, self).__init__()
+        self.se1 = nn.Conv2d(in_planes, se_planes, kernel_size=1, bias=True)
+        self.se2 = nn.Conv2d(se_planes, in_planes, kernel_size=1, bias=True)
+
+    def forward(self, x):
+        out = F.adaptive_avg_pool2d(x, (1, 1))
+        out = F.relu(self.se1(out))
+        out = self.se2(out).sigmoid()
+        out = x * out
+        return out
+
+
+class Block(nn.Module):
+    def __init__(self, w_in, w_out, stride, group_width, bottleneck_ratio, se_ratio):
+        super(Block, self).__init__()
+        # 1x1
+        w_b = int(round(w_out * bottleneck_ratio))
+        self.conv1 = nn.Conv2d(w_in, w_b, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(w_b)
+        # 3x3
+        num_groups = w_b // group_width
+        self.conv2 = nn.Conv2d(w_b, w_b, kernel_size=3,
+                               stride=stride, padding=1, groups=num_groups, bias=False)
+        self.bn2 = nn.BatchNorm2d(w_b)
+        # se
+        self.with_se = se_ratio > 0
+        if self.with_se:
+            w_se = int(round(w_in * se_ratio))
+            self.se = SE(w_b, w_se)
+        # 1x1
+        self.conv3 = nn.Conv2d(w_b, w_out, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(w_out)
+
+        self.shortcut = nn.Sequential()
+        if stride != 1 or w_in != w_out:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(w_in, w_out,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(w_out)
+            )
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = F.relu(self.bn2(self.conv2(out)))
+        if self.with_se:
+            out = self.se(out)
+        out = self.bn3(self.conv3(out))
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+
+
+class RegNet(nn.Module):
+    def __init__(self, cfg, num_classes=10):
+        super(RegNet, self).__init__()
+        self.cfg = cfg
+        self.in_planes = 64
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
+                               stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.layer1 = self._make_layer(0)
+        self.layer2 = self._make_layer(1)
+        self.layer3 = self._make_layer(2)
+        self.layer4 = self._make_layer(3)
+        self.linear = nn.Linear(self.cfg['widths'][-1], num_classes)
+
+    def _make_layer(self, idx):
+        depth = self.cfg['depths'][idx]
+        width = self.cfg['widths'][idx]
+        stride = self.cfg['strides'][idx]
+        group_width = self.cfg['group_width']
+        bottleneck_ratio = self.cfg['bottleneck_ratio']
+        se_ratio = self.cfg['se_ratio']
+
+        layers = []
+        for i in range(depth):
+            s = stride if i == 0 else 1
+            layers.append(Block(self.in_planes, width,
+                                s, group_width, bottleneck_ratio, se_ratio))
+            self.in_planes = width
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = self.layer4(out)
+        out = F.adaptive_avg_pool2d(out, (1, 1))
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+
+
+def RegNetX_200MF():
+    cfg = {
+        'depths': [1, 1, 4, 7],
+        'widths': [24, 56, 152, 368],
+        'strides': [1, 1, 2, 2],
+        'group_width': 8,
+        'bottleneck_ratio': 1,
+        'se_ratio': 0,
+    }
+    return RegNet(cfg)
+
+
+def RegNetX_400MF():
+    cfg = {
+        'depths': [1, 2, 7, 12],
+        'widths': [32, 64, 160, 384],
+        'strides': [1, 1, 2, 2],
+        'group_width': 16,
+        'bottleneck_ratio': 1,
+        'se_ratio': 0,
+    }
+    return RegNet(cfg)
+
+
+def RegNetY_400MF():
+    cfg = {
+        'depths': [1, 2, 7, 12],
+        'widths': [32, 64, 160, 384],
+        'strides': [1, 1, 2, 2],
+        'group_width': 16,
+        'bottleneck_ratio': 1,
+        'se_ratio': 0.25,
+    }
+    return RegNet(cfg)
+
+
+def test():
+    net = RegNetX_200MF()
+    print(net)
+    x = torch.randn(2, 3, 32, 32)
+    y = net(x)
+    print(y.shape)
+
+
+if __name__ == '__main__':
+    test()
--- a/models/Old/resnet.py
+++ b/models/Old/resnet.py
@ -0,0 +1,132 @@
+'''ResNet in PyTorch.
+
+For Pre-activation ResNet, see 'preact_resnet.py'.
+
+Reference:
+[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
+    Deep Residual Learning for Image Recognition. arXiv:1512.03385
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, in_planes, planes, stride=1):
+        super(BasicBlock, self).__init__()
+        self.conv1 = nn.Conv2d(
+            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
+                               stride=1, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != self.expansion*planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, self.expansion*planes,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(self.expansion*planes)
+            )
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.bn2(self.conv2(out))
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+
+
+class Bottleneck(nn.Module):
+    expansion = 4
+
+    def __init__(self, in_planes, planes, stride=1):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
+                               stride=stride, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(planes, self.expansion *
+                               planes, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(self.expansion*planes)
+
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != self.expansion*planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, self.expansion*planes,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(self.expansion*planes)
+            )
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = F.relu(self.bn2(self.conv2(out)))
+        out = self.bn3(self.conv3(out))
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+
+
+class ResNet(nn.Module):
+    def __init__(self, block, num_blocks, num_classes=10):
+        super(ResNet, self).__init__()
+        self.in_planes = 64
+
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
+                               stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
+        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
+        self.linear = nn.Linear(512*block.expansion, num_classes)
+
+    def _make_layer(self, block, planes, num_blocks, stride):
+        strides = [stride] + [1]*(num_blocks-1)
+        layers = []
+        for stride in strides:
+            layers.append(block(self.in_planes, planes, stride))
+            self.in_planes = planes * block.expansion
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = self.layer4(out)
+        out = F.avg_pool2d(out, 4)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+
+
+def ResNet18():
+    return ResNet(BasicBlock, [2, 2, 2, 2])
+
+
+def ResNet34():
+    return ResNet(BasicBlock, [3, 4, 6, 3])
+
+
+def ResNet50():
+    return ResNet(Bottleneck, [3, 4, 6, 3])
+
+
+def ResNet101():
+    return ResNet(Bottleneck, [3, 4, 23, 3])
+
+
+def ResNet152():
+    return ResNet(Bottleneck, [3, 8, 36, 3])
+
+
+def test():
+    net = ResNet18()
+    y = net(torch.randn(1, 3, 32, 32))
+    print(y.size())
+
+# test()
--- a/models/Old/resnext.py
+++ b/models/Old/resnext.py
@ -0,0 +1,95 @@
+'''ResNeXt in PyTorch.
+
+See the paper "Aggregated Residual Transformations for Deep Neural Networks" for more details.
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class Block(nn.Module):
+    '''Grouped convolution block.'''
+    expansion = 2
+
+    def __init__(self, in_planes, cardinality=32, bottleneck_width=4, stride=1):
+        super(Block, self).__init__()
+        group_width = cardinality * bottleneck_width
+        self.conv1 = nn.Conv2d(in_planes, group_width, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(group_width)
+        self.conv2 = nn.Conv2d(group_width, group_width, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False)
+        self.bn2 = nn.BatchNorm2d(group_width)
+        self.conv3 = nn.Conv2d(group_width, self.expansion*group_width, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(self.expansion*group_width)
+
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != self.expansion*group_width:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, self.expansion*group_width, kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(self.expansion*group_width)
+            )
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = F.relu(self.bn2(self.conv2(out)))
+        out = self.bn3(self.conv3(out))
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+
+
+class ResNeXt(nn.Module):
+    def __init__(self, num_blocks, cardinality, bottleneck_width, num_classes=10):
+        super(ResNeXt, self).__init__()
+        self.cardinality = cardinality
+        self.bottleneck_width = bottleneck_width
+        self.in_planes = 64
+
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.layer1 = self._make_layer(num_blocks[0], 1)
+        self.layer2 = self._make_layer(num_blocks[1], 2)
+        self.layer3 = self._make_layer(num_blocks[2], 2)
+        # self.layer4 = self._make_layer(num_blocks[3], 2)
+        self.linear = nn.Linear(cardinality*bottleneck_width*8, num_classes)
+
+    def _make_layer(self, num_blocks, stride):
+        strides = [stride] + [1]*(num_blocks-1)
+        layers = []
+        for stride in strides:
+            layers.append(Block(self.in_planes, self.cardinality, self.bottleneck_width, stride))
+            self.in_planes = Block.expansion * self.cardinality * self.bottleneck_width
+        # Increase bottleneck_width by 2 after each stage.
+        self.bottleneck_width *= 2
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        # out = self.layer4(out)
+        out = F.avg_pool2d(out, 8)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+
+
+def ResNeXt29_2x64d():
+    return ResNeXt(num_blocks=[3,3,3], cardinality=2, bottleneck_width=64)
+
+def ResNeXt29_4x64d():
+    return ResNeXt(num_blocks=[3,3,3], cardinality=4, bottleneck_width=64)
+
+def ResNeXt29_8x64d():
+    return ResNeXt(num_blocks=[3,3,3], cardinality=8, bottleneck_width=64)
+
+def ResNeXt29_32x4d():
+    return ResNeXt(num_blocks=[3,3,3], cardinality=32, bottleneck_width=4)
+
+def test_resnext():
+    net = ResNeXt29_2x64d()
+    x = torch.randn(1,3,32,32)
+    y = net(x)
+    print(y.size())
+
+# test_resnext()
--- a/models/Old/sconv2davg.py
+++ b/models/Old/sconv2davg.py
@ -0,0 +1,140 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+import math
+
+class SConv2dAvg(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1,ceil_mode=True):
+        super(SConv2dAvg, self).__init__()
+        conv = nn.Conv2d(in_channels, out_channels, kernel_size)
+        self.deconv = nn.ConvTranspose2d(1, 1, kernel_size, 1, padding=0, output_padding=0, groups=1, bias=False, dilation=1, padding_mode='zeros')
+        nn.init.constant_(self.deconv.weight, 1)
+        self.pooldeconv = nn.ConvTranspose2d(1, 1, kernel_size=stride,padding=0,stride=stride, output_padding=0, groups=1, bias=False, dilation=1, padding_mode='zeros')
+        nn.init.constant_(self.pooldeconv.weight, 1)
+        self.weight = nn.Parameter(conv.weight)
+        self.bias = nn.Parameter(conv.bias)
+        self.stride = stride       
+        self.dilation = dilation 
+        self.padding = padding
+        self.kernel_size = kernel_size
+        self.ceil_mode = ceil_mode
+       
+    def forward(self, input, selh=-torch.ones(1,1), selw=-torch.ones(1,1), mask=-torch.ones(1,1),stoch=False,stride=-1):
+        device=input.device
+        if stride==-1:
+            stride = self.stride
+        #stoch=True
+        if stoch==False:
+            stride=1 #test with real average pooling
+        batch_size, in_channels, in_h, in_w = input.shape
+        out_channels, in_channels, kh, kw =  self.weight.shape
+
+        afterconv_h = in_h-(kh-1) #size after conv
+        afterconv_w = in_w-(kw-1)
+        if self.ceil_mode:
+            out_h = math.ceil(afterconv_h/stride)
+            out_w = math.ceil(afterconv_w/stride)
+        else:
+            out_h = math.floor(afterconv_h/stride)
+            out_w = math.floor(afterconv_w/stride)
+        unfold = torch.nn.Unfold(kernel_size=(kh, kw), dilation=self.dilation, padding=self.padding, stride=1)
+        inp_unf = unfold(input)
+        if stride!=1:
+            inp_unf = inp_unf.view(batch_size,in_channels*kh*kw,afterconv_h,afterconv_w)
+            if selh[0,0]==-1:
+                resth = (out_h*stride)-afterconv_h
+                restw = (out_w*stride)-afterconv_w
+                selh = torch.randint(stride,(out_h,out_w), device=device)
+                selw = torch.randint(stride,(out_h,out_w), device=device)
+                # print(selh.shape)
+                if resth!=0:
+                    # Cas : (stride-resth)=0 ?
+                    selh[-1,:]=selh[-1,:]%(stride-resth);selh[:,-1]=selh[:,-1]%(stride-restw)
+                    selw[-1,:]=selw[-1,:]%(stride-resth);selw[:,-1]=selw[:,-1]%(stride-restw)
+            rng_h = selh + torch.arange(0,out_h*stride,stride,device=device).view(-1,1)
+            rng_w = selw + torch.arange(0,out_w*stride,stride,device=device)
+           
+            if mask[0,0]==-1:
+                inp_unf = inp_unf[:,:,rng_h,rng_w].view(batch_size,in_channels*kh*kw,-1)
+            else:
+                inp_unf = inp_unf[:,:,rng_h[mask>0],rng_w[mask>0]]
+
+        #Matrix mul
+        if self.bias is None:
+            out_unf = inp_unf.transpose(1, 2).matmul(self.weight.view(self.weight.size(0), -1).t()).transpose(1, 2)
+        else:
+            out_unf = (inp_unf.transpose(1, 2).matmul(self.weight.view(self.weight.size(0), -1).t()) + self.bias).transpose(1, 2)
+
+        if stride==1 or mask[0,0]==-1:
+            out = out_unf.view(batch_size,out_channels,out_h,out_w) #Fold
+            if stoch==False:
+                out = F.avg_pool2d(out,self.stride,ceil_mode=True)
+        else:
+            out = torch.zeros(batch_size, out_channels,out_h,out_w,device=device)
+            out[:,:,mask>0] = out_unf
+        return out        
+
+    def comp(self,h,w,mask=-torch.ones(1,1)):
+        out_h = (h-(self.kernel_size))/self.stride
+        out_w = (w-(self.kernel_size))/self.stride
+        if self.ceil_mode:
+            out_h = math.ceil(out_h)
+            out_w = math.ceil(out_w)
+        else:
+            out_h = math.floor(out_h)
+            out_w = math.florr(out_w)
+        if mask[0,0]==-1:
+            comp = self.weight.numel()*out_h*out_w 
+        else:
+            comp = self.weight.numel()*(mask>0).sum()
+        return comp
+
+    def sample(self,h,w,mask):
+        '''
+            h, w : forward input shape
+            mask : mask of output used in computation
+        '''
+        stride = self.stride
+        out_channels, in_channels, kh, kw =  self.weight.shape
+        device=mask.device
+
+        afterconv_h = h-(kh-1) #Pk afterconv ?
+        afterconv_w = w-(kw-1)
+        if self.ceil_mode:
+            out_h = math.ceil(afterconv_h/stride)
+            out_w = math.ceil(afterconv_w/stride)
+        else:
+            out_h = math.floor(afterconv_h/stride)
+            out_w = math.floor(afterconv_w/stride)
+        selh = torch.randint(stride,(out_h,out_w), device=device)
+        selw = torch.randint(stride,(out_h,out_w), device=device)
+
+        resth = (out_h*stride)-afterconv_h #simplement egale a stride-1, non ?
+        restw = (out_w*stride)-afterconv_w
+        # print('resth', resth, self.stride)
+        if resth!=0:
+            selh[-1,:]=selh[-1,:]%(stride-resth);selh[:,-1]=selh[:,-1]%(stride-restw)
+            selw[-1,:]=selw[-1,:]%(stride-resth);selw[:,-1]=selw[:,-1]%(stride-restw)
+        maskh = (out_h)*stride
+        maskw = (out_w)*stride
+        rng_h = selh + torch.arange(0,out_h*stride,stride,device=device).view(-1,1)
+        rng_w = selw + torch.arange(0,out_w*stride,stride,device=device)
+        # rng_w = selw + torch.arange(0,out_w*self.stride,self.stride,device=device).view(-1,1)
+        nmask = torch.zeros((maskh,maskw),device=device)
+        nmask[rng_h,rng_w] = 1
+        #rmask = mask * nmask
+        dmask = self.pooldeconv(mask.float().view(1,1,mask.shape[0],mask.shape[1]))
+        rmask = nmask * dmask
+        #rmask = rmask[:,:,:out_h,:out_w]
+        fmask = self.deconv(rmask)
+        fmask = fmask[0,0]
+        return selh,selw,fmask.long()
+
+    def get_size(self,h,w):
+        # newh=(h-(self.kernel_size-1)+(self.stride-1))/self.stride
+        # neww=(w-(self.kernel_size-1)+(self.stride-1))/self.stride
+        # print(newh,neww)
+        newh=math.floor(((h + 2*self.padding - self.dilation*(self.kernel_size-1) - 1)/self.stride) + 1)
+        neww=math.floor(((w + 2*self.padding - self.dilation*(self.kernel_size-1) - 1)/self.stride) + 1)
+        return newh, neww
--- a/models/Old/senet.py
+++ b/models/Old/senet.py
@ -0,0 +1,121 @@
+'''SENet in PyTorch.
+
+SENet is the winner of ImageNet-2017. The paper is not released yet.
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class BasicBlock(nn.Module):
+    def __init__(self, in_planes, planes, stride=1):
+        super(BasicBlock, self).__init__()
+        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(planes)
+            )
+
+        # SE layers
+        self.fc1 = nn.Conv2d(planes, planes//16, kernel_size=1)  # Use nn.Conv2d instead of nn.Linear
+        self.fc2 = nn.Conv2d(planes//16, planes, kernel_size=1)
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.bn2(self.conv2(out))
+
+        # Squeeze
+        w = F.avg_pool2d(out, out.size(2))
+        w = F.relu(self.fc1(w))
+        w = F.sigmoid(self.fc2(w))
+        # Excitation
+        out = out * w  # New broadcasting feature from v0.2!
+
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+
+
+class PreActBlock(nn.Module):
+    def __init__(self, in_planes, planes, stride=1):
+        super(PreActBlock, self).__init__()
+        self.bn1 = nn.BatchNorm2d(in_planes)
+        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
+
+        if stride != 1 or in_planes != planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False)
+            )
+
+        # SE layers
+        self.fc1 = nn.Conv2d(planes, planes//16, kernel_size=1)
+        self.fc2 = nn.Conv2d(planes//16, planes, kernel_size=1)
+
+    def forward(self, x):
+        out = F.relu(self.bn1(x))
+        shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
+        out = self.conv1(out)
+        out = self.conv2(F.relu(self.bn2(out)))
+
+        # Squeeze
+        w = F.avg_pool2d(out, out.size(2))
+        w = F.relu(self.fc1(w))
+        w = F.sigmoid(self.fc2(w))
+        # Excitation
+        out = out * w
+
+        out += shortcut
+        return out
+
+
+class SENet(nn.Module):
+    def __init__(self, block, num_blocks, num_classes=10):
+        super(SENet, self).__init__()
+        self.in_planes = 64
+
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.layer1 = self._make_layer(block,  64, num_blocks[0], stride=1)
+        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
+        self.linear = nn.Linear(512, num_classes)
+
+    def _make_layer(self, block, planes, num_blocks, stride):
+        strides = [stride] + [1]*(num_blocks-1)
+        layers = []
+        for stride in strides:
+            layers.append(block(self.in_planes, planes, stride))
+            self.in_planes = planes
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = self.layer4(out)
+        out = F.avg_pool2d(out, 4)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+
+
+def SENet18():
+    return SENet(PreActBlock, [2,2,2,2])
+
+
+def test():
+    net = SENet18()
+    y = net(torch.randn(1,3,32,32))
+    print(y.size())
+
+# test()
--- a/models/Old/shufflenet.py
+++ b/models/Old/shufflenet.py
@ -0,0 +1,109 @@
+'''ShuffleNet in PyTorch.
+
+See the paper "ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices" for more details.
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class ShuffleBlock(nn.Module):
+    def __init__(self, groups):
+        super(ShuffleBlock, self).__init__()
+        self.groups = groups
+
+    def forward(self, x):
+        '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]'''
+        N,C,H,W = x.size()
+        g = self.groups
+        return x.view(N,g,C//g,H,W).permute(0,2,1,3,4).reshape(N,C,H,W)
+
+
+class Bottleneck(nn.Module):
+    def __init__(self, in_planes, out_planes, stride, groups):
+        super(Bottleneck, self).__init__()
+        self.stride = stride
+
+        mid_planes = out_planes/4
+        g = 1 if in_planes==24 else groups
+        self.conv1 = nn.Conv2d(in_planes, mid_planes, kernel_size=1, groups=g, bias=False)
+        self.bn1 = nn.BatchNorm2d(mid_planes)
+        self.shuffle1 = ShuffleBlock(groups=g)
+        self.conv2 = nn.Conv2d(mid_planes, mid_planes, kernel_size=3, stride=stride, padding=1, groups=mid_planes, bias=False)
+        self.bn2 = nn.BatchNorm2d(mid_planes)
+        self.conv3 = nn.Conv2d(mid_planes, out_planes, kernel_size=1, groups=groups, bias=False)
+        self.bn3 = nn.BatchNorm2d(out_planes)
+
+        self.shortcut = nn.Sequential()
+        if stride == 2:
+            self.shortcut = nn.Sequential(nn.AvgPool2d(3, stride=2, padding=1))
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.shuffle1(out)
+        out = F.relu(self.bn2(self.conv2(out)))
+        out = self.bn3(self.conv3(out))
+        res = self.shortcut(x)
+        out = F.relu(torch.cat([out,res], 1)) if self.stride==2 else F.relu(out+res)
+        return out
+
+
+class ShuffleNet(nn.Module):
+    def __init__(self, cfg):
+        super(ShuffleNet, self).__init__()
+        out_planes = cfg['out_planes']
+        num_blocks = cfg['num_blocks']
+        groups = cfg['groups']
+
+        self.conv1 = nn.Conv2d(3, 24, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(24)
+        self.in_planes = 24
+        self.layer1 = self._make_layer(out_planes[0], num_blocks[0], groups)
+        self.layer2 = self._make_layer(out_planes[1], num_blocks[1], groups)
+        self.layer3 = self._make_layer(out_planes[2], num_blocks[2], groups)
+        self.linear = nn.Linear(out_planes[2], 10)
+
+    def _make_layer(self, out_planes, num_blocks, groups):
+        layers = []
+        for i in range(num_blocks):
+            stride = 2 if i == 0 else 1
+            cat_planes = self.in_planes if i == 0 else 0
+            layers.append(Bottleneck(self.in_planes, out_planes-cat_planes, stride=stride, groups=groups))
+            self.in_planes = out_planes
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = F.avg_pool2d(out, 4)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+
+
+def ShuffleNetG2():
+    cfg = {
+        'out_planes': [200,400,800],
+        'num_blocks': [4,8,4],
+        'groups': 2
+    }
+    return ShuffleNet(cfg)
+
+def ShuffleNetG3():
+    cfg = {
+        'out_planes': [240,480,960],
+        'num_blocks': [4,8,4],
+        'groups': 3
+    }
+    return ShuffleNet(cfg)
+
+
+def test():
+    net = ShuffleNetG2()
+    x = torch.randn(1,3,32,32)
+    y = net(x)
+    print(y)
+
+# test()
--- a/models/Old/shufflenetv2.py
+++ b/models/Old/shufflenetv2.py
@ -0,0 +1,162 @@
+'''ShuffleNetV2 in PyTorch.
+
+See the paper "ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" for more details.
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class ShuffleBlock(nn.Module):
+    def __init__(self, groups=2):
+        super(ShuffleBlock, self).__init__()
+        self.groups = groups
+
+    def forward(self, x):
+        '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]'''
+        N, C, H, W = x.size()
+        g = self.groups
+        return x.view(N, g, C//g, H, W).permute(0, 2, 1, 3, 4).reshape(N, C, H, W)
+
+
+class SplitBlock(nn.Module):
+    def __init__(self, ratio):
+        super(SplitBlock, self).__init__()
+        self.ratio = ratio
+
+    def forward(self, x):
+        c = int(x.size(1) * self.ratio)
+        return x[:, :c, :, :], x[:, c:, :, :]
+
+
+class BasicBlock(nn.Module):
+    def __init__(self, in_channels, split_ratio=0.5):
+        super(BasicBlock, self).__init__()
+        self.split = SplitBlock(split_ratio)
+        in_channels = int(in_channels * split_ratio)
+        self.conv1 = nn.Conv2d(in_channels, in_channels,
+                               kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(in_channels)
+        self.conv2 = nn.Conv2d(in_channels, in_channels,
+                               kernel_size=3, stride=1, padding=1, groups=in_channels, bias=False)
+        self.bn2 = nn.BatchNorm2d(in_channels)
+        self.conv3 = nn.Conv2d(in_channels, in_channels,
+                               kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(in_channels)
+        self.shuffle = ShuffleBlock()
+
+    def forward(self, x):
+        x1, x2 = self.split(x)
+        out = F.relu(self.bn1(self.conv1(x2)))
+        out = self.bn2(self.conv2(out))
+        out = F.relu(self.bn3(self.conv3(out)))
+        out = torch.cat([x1, out], 1)
+        out = self.shuffle(out)
+        return out
+
+
+class DownBlock(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super(DownBlock, self).__init__()
+        mid_channels = out_channels // 2
+        # left
+        self.conv1 = nn.Conv2d(in_channels, in_channels,
+                               kernel_size=3, stride=2, padding=1, groups=in_channels, bias=False)
+        self.bn1 = nn.BatchNorm2d(in_channels)
+        self.conv2 = nn.Conv2d(in_channels, mid_channels,
+                               kernel_size=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(mid_channels)
+        # right
+        self.conv3 = nn.Conv2d(in_channels, mid_channels,
+                               kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(mid_channels)
+        self.conv4 = nn.Conv2d(mid_channels, mid_channels,
+                               kernel_size=3, stride=2, padding=1, groups=mid_channels, bias=False)
+        self.bn4 = nn.BatchNorm2d(mid_channels)
+        self.conv5 = nn.Conv2d(mid_channels, mid_channels,
+                               kernel_size=1, bias=False)
+        self.bn5 = nn.BatchNorm2d(mid_channels)
+
+        self.shuffle = ShuffleBlock()
+
+    def forward(self, x):
+        # left
+        out1 = self.bn1(self.conv1(x))
+        out1 = F.relu(self.bn2(self.conv2(out1)))
+        # right
+        out2 = F.relu(self.bn3(self.conv3(x)))
+        out2 = self.bn4(self.conv4(out2))
+        out2 = F.relu(self.bn5(self.conv5(out2)))
+        # concat
+        out = torch.cat([out1, out2], 1)
+        out = self.shuffle(out)
+        return out
+
+
+class ShuffleNetV2(nn.Module):
+    def __init__(self, net_size):
+        super(ShuffleNetV2, self).__init__()
+        out_channels = configs[net_size]['out_channels']
+        num_blocks = configs[net_size]['num_blocks']
+
+        self.conv1 = nn.Conv2d(3, 24, kernel_size=3,
+                               stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(24)
+        self.in_channels = 24
+        self.layer1 = self._make_layer(out_channels[0], num_blocks[0])
+        self.layer2 = self._make_layer(out_channels[1], num_blocks[1])
+        self.layer3 = self._make_layer(out_channels[2], num_blocks[2])
+        self.conv2 = nn.Conv2d(out_channels[2], out_channels[3],
+                               kernel_size=1, stride=1, padding=0, bias=False)
+        self.bn2 = nn.BatchNorm2d(out_channels[3])
+        self.linear = nn.Linear(out_channels[3], 10)
+
+    def _make_layer(self, out_channels, num_blocks):
+        layers = [DownBlock(self.in_channels, out_channels)]
+        for i in range(num_blocks):
+            layers.append(BasicBlock(out_channels))
+            self.in_channels = out_channels
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        # out = F.max_pool2d(out, 3, stride=2, padding=1)
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = F.relu(self.bn2(self.conv2(out)))
+        out = F.avg_pool2d(out, 4)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+
+
+configs = {
+    0.5: {
+        'out_channels': (48, 96, 192, 1024),
+        'num_blocks': (3, 7, 3)
+    },
+
+    1: {
+        'out_channels': (116, 232, 464, 1024),
+        'num_blocks': (3, 7, 3)
+    },
+    1.5: {
+        'out_channels': (176, 352, 704, 1024),
+        'num_blocks': (3, 7, 3)
+    },
+    2: {
+        'out_channels': (224, 488, 976, 2048),
+        'num_blocks': (3, 7, 3)
+    }
+}
+
+
+def test():
+    net = ShuffleNetV2(net_size=0.5)
+    x = torch.randn(3, 3, 32, 32)
+    y = net(x)
+    print(y.shape)
+
+
+# test()
--- a/models/Old/vgg.py
+++ b/models/Old/vgg.py
@ -0,0 +1,47 @@
+'''VGG11/13/16/19 in Pytorch.'''
+import torch
+import torch.nn as nn
+
+
+cfg = {
+    'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
+    'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
+    'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
+    'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
+}
+
+
+class VGG(nn.Module):
+    def __init__(self, vgg_name):
+        super(VGG, self).__init__()
+        self.features = self._make_layers(cfg[vgg_name])
+        self.classifier = nn.Linear(512, 10)
+
+    def forward(self, x):
+        out = self.features(x)
+        out = out.view(out.size(0), -1)
+        out = self.classifier(out)
+        return out
+
+    def _make_layers(self, cfg):
+        layers = []
+        in_channels = 3
+        for x in cfg:
+            if x == 'M':
+                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
+            else:
+                layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
+                           nn.BatchNorm2d(x),
+                           nn.ReLU(inplace=True)]
+                in_channels = x
+        layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
+        return nn.Sequential(*layers)
+
+
+def test():
+    net = VGG('VGG11')
+    x = torch.randn(2,3,32,32)
+    y = net(x)
+    print(y.size())
+
+# test()