Initial commit

2025-06-27 22:15:24 +02:00 · 2020-06-12 01:42:08 -07:00 · 2020-06-12 01:42:08 -07:00 · 3de923156c
commit 3de923156c
parent 2ba6dbe7cc
32 changed files with 4054 additions and 1 deletions
--- a/21
+++ b/21
@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2017 liukuang
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/README.md
+++ b/README.md
@ -1 +1,33 @@
-# BU_Stoch_pool
+# BU_Stoch_pool
+
+# Train CIFAR10 with PyTorch
+
+I'm playing with [PyTorch](http://pytorch.org/) on the CIFAR10 dataset.
+
+## Prerequisites
+- Python 3.6+
+- PyTorch 1.0+
+
+## Accuracy
+| Model             | Acc.        |
+| ----------------- | ----------- |
+| [VGG16](https://arxiv.org/abs/1409.1556)              | 92.64%      |
+| [ResNet18](https://arxiv.org/abs/1512.03385)          | 93.02%      |
+| [ResNet50](https://arxiv.org/abs/1512.03385)          | 93.62%      |
+| [ResNet101](https://arxiv.org/abs/1512.03385)         | 93.75%      |
+| [RegNetX_200MF](https://arxiv.org/abs/2003.13678)     | 94.24%      |
+| [RegNetY_400MF](https://arxiv.org/abs/2003.13678)     | 94.29%      |
+| [MobileNetV2](https://arxiv.org/abs/1801.04381)       | 94.43%      |
+| [ResNeXt29(32x4d)](https://arxiv.org/abs/1611.05431)  | 94.73%      |
+| [ResNeXt29(2x64d)](https://arxiv.org/abs/1611.05431)  | 94.82%      |
+| [DenseNet121](https://arxiv.org/abs/1608.06993)       | 95.04%      |
+| [PreActResNet18](https://arxiv.org/abs/1603.05027)    | 95.11%      |
+| [DPN92](https://arxiv.org/abs/1707.01629)             | 95.16%      |
+
+## Learning rate adjustment
+I manually change the `lr` during training:
+- `0.1` for epoch `[0,150)`
+- `0.01` for epoch `[150,250)`
+- `0.001` for epoch `[250,350)`
+
+Resume the training with `python main.py --resume --lr=0.01`
--- a/jobs/test.sh
+++ b/jobs/test.sh
@ -0,0 +1,26 @@
+#!/bin/bash
+#SBATCH --gres=gpu:1 #gpu:v100l:1    # https://docs.computecanada.ca/wiki/Using_GPUs_with_Slurm
+#SBATCH --cpus-per-task=6 #6  # Cores proportional to GPUs: 6 on Cedar, 16 on Graham.
+#SBATCH --mem=32000M  #32000M       # Memory proportional to CPUs: 32000 Cedar, 64000 Graham.
+#SBATCH --account=def-mpederso
+#SBATCH --time=1:00:00
+#SBATCH --job-name=MyResNet18
+#SBATCH --output=log/%x-%j.out
+#SBATCH --mail-user=harle.collette.antoine@gmail.com
+#SBATCH --mail-type=END
+#SBATCH --mail-type=FAIL
+
+
+# Setup
+source ~/dataug/bin/activate
+
+#Execute
+# echo $(pwd) = /home/antoh/projects/def-mpederso/antoh/stoch/jobs
+cd ../
+
+time python main.py \
+    -n MyResNet18 \
+    -ep 10 \
+    -sc cosine \
+    -lr 5e-2 \
+    -pf _noCrop_Stoch
--- a/main.py
+++ b/main.py
@ -0,0 +1,351 @@
+'''Train CIFAR10 with PyTorch.'''
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import torch.nn.functional as F
+import torch.backends.cudnn as cudnn
+
+import torchvision
+import torchvision.transforms as transforms
+
+import os
+import sys
+import time
+import argparse
+
+from models import *
+# from utils import progress_bar
+
+
+parser = argparse.ArgumentParser(description='PyTorch CIFAR10 Training')
+parser.add_argument('-lr', default=0.1, type=float, help='learning rate')
+parser.add_argument('--batch', default=128, type=int, help='batch_size')
+parser.add_argument('--epochs', '-ep', default=10, type=int, help='epochs')
+parser.add_argument('--scheduler', '-sc', dest='scheduler', default='',
+                    help='cosine/multiStep/exponential')
+parser.add_argument('--warmup_mul', '-wm', dest='warmup_mul', type=float, default=0, #2 #+ batch_size => + mutliplier
+                    help='Warmup multiplier')
+parser.add_argument('--warmup_ep', '-we', dest='warmup_ep', type=int, default=5,
+                    help='Warmup epochs')
+parser.add_argument('--resume', '-r', action='store_true',
+                    help='resume from checkpoint')
+parser.add_argument('--stoch', '-s', action='store_true',
+                    help='use stochastic pooling')
+parser.add_argument('--network', '-n', dest='net', default='MyLeNetNormal',
+                    help='Network')
+parser.add_argument('--res_folder', '-rf', dest='res_folder', default='res/',
+                    help='Results destination')
+parser.add_argument('--postfix', '-pf', dest='postfix', default='',
+                    help='Results postfix')    
+parser.add_argument('--dataset', '-d', dest='dataset', default='CIFAR10',
+                    help='Dataset')                  
+args = parser.parse_args()
+print(args)
+
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+best_acc = 0  # best test accuracy
+start_epoch = 0  # start from epoch 0 or last checkpoint epoch
+checkpoint=False
+
+# Data
+print('==> Preparing data..')
+dataroot="~/scratch/data"
+download_data=False
+transform_train = [
+    # transforms.RandomCrop(32, padding=4),
+    transforms.RandomHorizontalFlip(),
+    transforms.ToTensor(),
+    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
+]
+
+transform_test = [
+    transforms.ToTensor(),
+    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
+]
+
+# trainset = torchvision.datasets.CIFAR10(
+#     root=dataroot, train=True, download=True, transform=transform_train)
+# trainloader = torch.utils.data.DataLoader(
+#     trainset, batch_size=args.batch, shuffle=True, num_workers=2)
+
+# testset = torchvision.datasets.CIFAR10(
+#     root=dataroot, train=False, download=True, transform=transform_test)
+# testloader = torch.utils.data.DataLoader(
+#     testset, batch_size=args.batch, shuffle=False, num_workers=2)
+
+# classes = ('plane', 'car', 'bird', 'cat', 'deer',
+#            'dog', 'frog', 'horse', 'ship', 'truck')
+
+if args.dataset == 'CIFAR10': #(32x32 RGB)
+    transform_train=transform_train+[transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))]
+    transform_test=transform_test+[transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))]
+
+    trainset = torchvision.datasets.CIFAR10(dataroot, train=True, download=download_data, transform=transforms.Compose(transform_train))
+    # data_val = torchvision.datasets.CIFAR10(dataroot, train=True, download=download_data, transform=transforms.Compose(transform))
+    testset = torchvision.datasets.CIFAR10(dataroot, train=False, download=download_data, transform=transforms.Compose(transform_test))
+elif args.dataset == 'TinyImageNet': #(Train:100k, Val:5k, Test:5k) (64x64 RGB)
+    transform_train=transform_train+[transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))]
+    transform_test=transform_test+[transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))]
+    
+    trainset = torchvision.datasets.ImageFolder(os.path.join(dataroot, 'tiny-imagenet-200/train'), transform=transforms.Compose(transform_train))
+    # data_val = torchvision.datasets.ImageFolder(os.path.join(dataroot, 'tiny-imagenet-200/val'), transform=transforms.Compose(transform))
+    testset = torchvision.datasets.ImageFolder(os.path.join(dataroot, 'tiny-imagenet-200/test'), transform=transforms.Compose(transform_test))
+else:
+    raise Exception('Unknown dataset')
+    
+trainloader = torch.utils.data.DataLoader(
+    trainset, batch_size=args.batch, shuffle=True, num_workers=2)
+testloader = torch.utils.data.DataLoader(
+    testset, batch_size=args.batch, shuffle=False, num_workers=2)
+
+# Model
+print('==> Building model..')
+#normal cuda convolution
+# net = MyLeNetNormal() #11.3s - 49.4%  #2.3GB
+
+#strided convolutions instead of pooling
+#net = MyLeNetStride() #5.7s - 41.45% (5 epochs) #0.86GB
+
+#convolution with matrices unfold
+#net = MyLeNetMatNormal() #19.6s - 41.3%  #1.7GB 
+
+#stochastic like fig.2 paper
+#net = MyLeNetMatStoch() # 16.8s - 41.3%  #1.8GB
+
+#storchastic Bottom-UP like fig.3 paper
+# net = MyLeNetMatStochBU() # 10.5s - 45.3%  #1.3GB
+
+net=globals()[args.net]()
+print(net)
+net = net.to(device)
+if device == 'cuda':
+    net = torch.nn.DataParallel(net)
+    cudnn.benchmark = True
+
+log = []
+if args.resume:
+    # Load checkpoint.
+    print('==> Resuming from checkpoint..')
+    assert os.path.isdir('checkpoint'), 'Error: no checkpoint directory found!'
+    checkpoint = torch.load('./checkpoint/ckpt.pth')
+    net.load_state_dict(checkpoint['net'])
+    best_acc = checkpoint['acc']
+    start_epoch = checkpoint['epoch']
+
+    print('WARNING : Log & Lr-Scheduler resuming is not available')
+
+criterion = nn.CrossEntropyLoss()
+optimizer = optim.SGD(net.parameters(), lr=args.lr,
+                      momentum=0.9, weight_decay=5e-4)
+
+# Training
+max_grad = 1 #Max gradient value #Limite catastrophic drop
+def train(epoch):
+    net.train()
+    train_loss = 0
+    correct = 0
+    total = 0
+    for batch_idx, (inputs, targets) in enumerate(trainloader):
+        inputs, targets = inputs.to(device), targets.to(device)
+        optimizer.zero_grad()
+        outputs = net(inputs)
+        loss = criterion(outputs, targets)
+        loss.backward()
+        torch.nn.utils.clip_grad_norm_(net.parameters(), max_norm=max_grad, norm_type=2) #Prevent exploding grad with RNN
+        optimizer.step()
+
+        #Log
+        train_loss += loss.item()
+        _, predicted = outputs.max(1)
+        total += targets.size(0)
+        correct += predicted.eq(targets).sum().item()
+
+        # progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
+        #              % (train_loss/(batch_idx+1), 100.*correct/total, correct, total))
+        
+    # if args.net in {'MyLeNetMatNormal', 'MyLeNetMatStoch', 'MyLeNetMatStochBU'}:
+    #     print('Comp',net.comp)
+    return train_loss/(batch_idx+1), 100.*correct/total
+
+#determinisitc test
+def test(epoch):
+    global best_acc
+    net.eval()
+    test_loss = 0
+    correct = 0
+    total = 0
+    with torch.no_grad():
+        for batch_idx, (inputs, targets) in enumerate(testloader):
+            inputs, targets = inputs.to(device), targets.to(device)
+            outputs = net(inputs,stoch=False)
+            loss = criterion(outputs, targets)
+
+            test_loss += loss.item()
+            _, predicted = outputs.max(1)
+            total += targets.size(0)
+            correct += predicted.eq(targets).sum().item()
+
+            # progress_bar(batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
+            #              % (test_loss/(batch_idx+1), 100.*correct/total, correct, total))
+
+    # Save checkpoint.
+    acc = 100.*correct/total
+    if acc > best_acc:
+        if checkpoint:
+            print('Saving..')
+            state = {
+                'net': net.state_dict(),
+                'acc': acc,
+                'epoch': epoch,
+            }
+            if not os.path.isdir('checkpoint'):
+                os.mkdir('checkpoint')
+            torch.save(state, './checkpoint/ckpt.pth')
+        best_acc = acc
+
+    return test_loss/(batch_idx+1), acc
+
+#Stochastic test
+def stest(epoch,times=10):
+    global best_acc
+    net.eval()
+    test_loss = 0
+    correct = 0
+    total = 0
+    with torch.no_grad():
+        for batch_idx, (inputs, targets) in enumerate(testloader):
+            inputs, targets = inputs.to(device), targets.to(device)
+            out = torch.zeros(times,inputs.shape[0],10).cuda()
+            for l in range(times):
+                out[l] = net(inputs,stoch=True)
+            outputs = out.mean(0)
+            loss = criterion(outputs, targets)
+
+            test_loss += loss.item()
+            _, predicted = outputs.max(1)
+            total += targets.size(0)
+            correct += predicted.eq(targets).sum().item()
+
+            # progress_bar(batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
+            #              % (test_loss/(batch_idx+1), 100.*correct/total, correct, total))
+
+    # Save checkpoint.
+    acc = 100.*correct/total
+    if acc > best_acc:
+        print('Saving..')
+        state = {
+            'net': net.state_dict(),
+            'acc': acc,
+            'epoch': epoch,
+        }
+        if not os.path.isdir('checkpoint'):
+            os.mkdir('checkpoint')
+        torch.save(state, './checkpoint/ckpt.pth')
+        best_acc = acc
+
+import matplotlib.pyplot as plt
+def plot_res(log, fig_name='res'):
+    """Save a visual graph of the logs.
+
+        Args:
+            log (dict): Logs of the training generated by most of train_utils.
+            fig_name (string): Relative path where to save the graph. (default: res)
+    """
+    epochs = [x["epoch"] for x in log]
+
+    fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(30, 15))
+
+    ax[0].set_title('Loss')
+    ax[0].plot(epochs,[x["train_loss"] for x in log], label='Train')
+    ax[0].plot(epochs,[x["test_loss"] for x in log], label='Test')
+    ax[0].legend()
+        
+    ax[1].set_title('Acc')
+    ax[1].plot(epochs,[x["train_acc"] for x in log], label='Train')
+    ax[1].plot(epochs,[x["test_acc"] for x in log], label='Test')
+    ax[1].legend()
+     
+
+    fig_name = fig_name.replace('.',',').replace(',,/','../')
+    plt.savefig(fig_name, bbox_inches='tight')
+    plt.close()
+
+from warmup_scheduler import GradualWarmupScheduler
+def get_scheduler(schedule, epochs, warmup_mul, warmup_ep):
+    scheduler=None
+    if schedule=='cosine':
+        scheduler=torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs, eta_min=0.)
+    elif schedule=='multiStep':
+        #Multistep milestones inspired by AutoAugment
+        scheduler=torch.optim.lr_scheduler.MultiStepLR(optimizer, 
+            milestones=[int(epochs/3), int(epochs*2/3), int(epochs*2.7/3)], 
+            gamma=0.1)
+    elif schedule=='exponential':
+        scheduler=torch.optim.lr_scheduler.LambdaLR(optimizer, lambda epoch: (1 - epoch / epochs) ** 0.9)
+    elif not(schedule is None or schedule==''):
+        raise ValueError("Lr scheduler unknown : %s"%schedule)
+
+    #Warmup
+    if warmup_mul>=1:
+        scheduler=GradualWarmupScheduler(optimizer, 
+            multiplier=warmup_mul, 
+            total_epoch=warmup_ep, 
+            after_scheduler=scheduler)
+    
+    return scheduler
+
+### MAIN ###
+print_freq=args.epochs/10
+res_folder=args.res_folder
+filename = ("{}-{}epochs".format(args.net,start_epoch+args.epochs))+args.postfix
+log = []
+
+#Lr-Scheduler
+scheduler=get_scheduler(args.scheduler, args.epochs, args.warmup_mul, args.warmup_ep)
+
+print('==> Training model..')
+t0 = time.perf_counter()
+for epoch in range(start_epoch, start_epoch+args.epochs):
+    
+    train_loss, train_acc = train(epoch)
+    test_loss, test_acc = test(epoch)
+
+    if scheduler is not None:
+        scheduler.step()
+
+    #### Log ####
+    log.append({
+        "epoch": epoch,
+        "train_loss": train_loss,
+        "train_acc": train_acc,
+        "test_loss": test_loss,
+        "test_acc": test_acc,
+    })
+
+    ### Print ###
+    if(print_freq and epoch%print_freq==0):
+        print('-'*9)
+        print('\nEpoch: %d' % epoch)
+        print("Acc : %.2f / %.2f"%(train_acc, test_acc))
+        print("Loss : %.2f / %.2f"%(train_loss, test_loss))
+
+exec_time=time.perf_counter() - t0
+print('-'*9)
+print('Best Acc : %.2f'%best_acc)
+print('Training time (s):',exec_time)
+
+
+import json
+try:
+    with open(res_folder+"log/%s.json" % filename, "w+") as f:
+        json.dump(log, f, indent=True)
+        print('Log :\"',f.name, '\" saved !')
+except:
+    print("Failed to save logs :",filename)
+    print(sys.exc_info()[1])
+try:
+    plot_res(log, fig_name=res_folder+filename)
+    print('Plot :\"',res_folder+filename, '\" saved !')
+except:
+    print("Failed to plot res")
+    print(sys.exc_info()[1])
--- a/models/Old/densenet.py
+++ b/models/Old/densenet.py
@ -0,0 +1,107 @@
+'''DenseNet in PyTorch.'''
+import math
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class Bottleneck(nn.Module):
+    def __init__(self, in_planes, growth_rate):
+        super(Bottleneck, self).__init__()
+        self.bn1 = nn.BatchNorm2d(in_planes)
+        self.conv1 = nn.Conv2d(in_planes, 4*growth_rate, kernel_size=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(4*growth_rate)
+        self.conv2 = nn.Conv2d(4*growth_rate, growth_rate, kernel_size=3, padding=1, bias=False)
+
+    def forward(self, x):
+        out = self.conv1(F.relu(self.bn1(x)))
+        out = self.conv2(F.relu(self.bn2(out)))
+        out = torch.cat([out,x], 1)
+        return out
+
+
+class Transition(nn.Module):
+    def __init__(self, in_planes, out_planes):
+        super(Transition, self).__init__()
+        self.bn = nn.BatchNorm2d(in_planes)
+        self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=False)
+
+    def forward(self, x):
+        out = self.conv(F.relu(self.bn(x)))
+        out = F.avg_pool2d(out, 2)
+        return out
+
+
+class DenseNet(nn.Module):
+    def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_classes=10):
+        super(DenseNet, self).__init__()
+        self.growth_rate = growth_rate
+
+        num_planes = 2*growth_rate
+        self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, padding=1, bias=False)
+
+        self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0])
+        num_planes += nblocks[0]*growth_rate
+        out_planes = int(math.floor(num_planes*reduction))
+        self.trans1 = Transition(num_planes, out_planes)
+        num_planes = out_planes
+
+        self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1])
+        num_planes += nblocks[1]*growth_rate
+        out_planes = int(math.floor(num_planes*reduction))
+        self.trans2 = Transition(num_planes, out_planes)
+        num_planes = out_planes
+
+        self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2])
+        num_planes += nblocks[2]*growth_rate
+        out_planes = int(math.floor(num_planes*reduction))
+        self.trans3 = Transition(num_planes, out_planes)
+        num_planes = out_planes
+
+        self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3])
+        num_planes += nblocks[3]*growth_rate
+
+        self.bn = nn.BatchNorm2d(num_planes)
+        self.linear = nn.Linear(num_planes, num_classes)
+
+    def _make_dense_layers(self, block, in_planes, nblock):
+        layers = []
+        for i in range(nblock):
+            layers.append(block(in_planes, self.growth_rate))
+            in_planes += self.growth_rate
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        out = self.conv1(x)
+        out = self.trans1(self.dense1(out))
+        out = self.trans2(self.dense2(out))
+        out = self.trans3(self.dense3(out))
+        out = self.dense4(out)
+        out = F.avg_pool2d(F.relu(self.bn(out)), 4)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+
+def DenseNet121():
+    return DenseNet(Bottleneck, [6,12,24,16], growth_rate=32)
+
+def DenseNet169():
+    return DenseNet(Bottleneck, [6,12,32,32], growth_rate=32)
+
+def DenseNet201():
+    return DenseNet(Bottleneck, [6,12,48,32], growth_rate=32)
+
+def DenseNet161():
+    return DenseNet(Bottleneck, [6,12,36,24], growth_rate=48)
+
+def densenet_cifar():
+    return DenseNet(Bottleneck, [6,12,24,16], growth_rate=12)
+
+def test():
+    net = densenet_cifar()
+    x = torch.randn(1,3,32,32)
+    y = net(x)
+    print(y)
+
+# test()
--- a/models/Old/dpn.py
+++ b/models/Old/dpn.py
@ -0,0 +1,98 @@
+'''Dual Path Networks in PyTorch.'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class Bottleneck(nn.Module):
+    def __init__(self, last_planes, in_planes, out_planes, dense_depth, stride, first_layer):
+        super(Bottleneck, self).__init__()
+        self.out_planes = out_planes
+        self.dense_depth = dense_depth
+
+        self.conv1 = nn.Conv2d(last_planes, in_planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(in_planes)
+        self.conv2 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=32, bias=False)
+        self.bn2 = nn.BatchNorm2d(in_planes)
+        self.conv3 = nn.Conv2d(in_planes, out_planes+dense_depth, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(out_planes+dense_depth)
+
+        self.shortcut = nn.Sequential()
+        if first_layer:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(last_planes, out_planes+dense_depth, kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(out_planes+dense_depth)
+            )
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = F.relu(self.bn2(self.conv2(out)))
+        out = self.bn3(self.conv3(out))
+        x = self.shortcut(x)
+        d = self.out_planes
+        out = torch.cat([x[:,:d,:,:]+out[:,:d,:,:], x[:,d:,:,:], out[:,d:,:,:]], 1)
+        out = F.relu(out)
+        return out
+
+
+class DPN(nn.Module):
+    def __init__(self, cfg):
+        super(DPN, self).__init__()
+        in_planes, out_planes = cfg['in_planes'], cfg['out_planes']
+        num_blocks, dense_depth = cfg['num_blocks'], cfg['dense_depth']
+
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.last_planes = 64
+        self.layer1 = self._make_layer(in_planes[0], out_planes[0], num_blocks[0], dense_depth[0], stride=1)
+        self.layer2 = self._make_layer(in_planes[1], out_planes[1], num_blocks[1], dense_depth[1], stride=2)
+        self.layer3 = self._make_layer(in_planes[2], out_planes[2], num_blocks[2], dense_depth[2], stride=2)
+        self.layer4 = self._make_layer(in_planes[3], out_planes[3], num_blocks[3], dense_depth[3], stride=2)
+        self.linear = nn.Linear(out_planes[3]+(num_blocks[3]+1)*dense_depth[3], 10)
+
+    def _make_layer(self, in_planes, out_planes, num_blocks, dense_depth, stride):
+        strides = [stride] + [1]*(num_blocks-1)
+        layers = []
+        for i,stride in enumerate(strides):
+            layers.append(Bottleneck(self.last_planes, in_planes, out_planes, dense_depth, stride, i==0))
+            self.last_planes = out_planes + (i+2) * dense_depth
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = self.layer4(out)
+        out = F.avg_pool2d(out, 4)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+
+
+def DPN26():
+    cfg = {
+        'in_planes': (96,192,384,768),
+        'out_planes': (256,512,1024,2048),
+        'num_blocks': (2,2,2,2),
+        'dense_depth': (16,32,24,128)
+    }
+    return DPN(cfg)
+
+def DPN92():
+    cfg = {
+        'in_planes': (96,192,384,768),
+        'out_planes': (256,512,1024,2048),
+        'num_blocks': (3,4,20,3),
+        'dense_depth': (16,32,24,128)
+    }
+    return DPN(cfg)
+
+
+def test():
+    net = DPN92()
+    x = torch.randn(1,3,32,32)
+    y = net(x)
+    print(y)
+
+# test()
--- a/models/Old/efficientnet.py
+++ b/models/Old/efficientnet.py
@ -0,0 +1,175 @@
+'''EfficientNet in PyTorch.
+
+Paper: "EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks".
+
+Reference: https://github.com/keras-team/keras-applications/blob/master/keras_applications/efficientnet.py
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+def swish(x):
+    return x * x.sigmoid()
+
+
+def drop_connect(x, drop_ratio):
+    keep_ratio = 1.0 - drop_ratio
+    mask = torch.empty([x.shape[0], 1, 1, 1], dtype=x.dtype, device=x.device)
+    mask.bernoulli_(keep_ratio)
+    x.div_(keep_ratio)
+    x.mul_(mask)
+    return x
+
+
+class SE(nn.Module):
+    '''Squeeze-and-Excitation block with Swish.'''
+
+    def __init__(self, in_channels, se_channels):
+        super(SE, self).__init__()
+        self.se1 = nn.Conv2d(in_channels, se_channels,
+                             kernel_size=1, bias=True)
+        self.se2 = nn.Conv2d(se_channels, in_channels,
+                             kernel_size=1, bias=True)
+
+    def forward(self, x):
+        out = F.adaptive_avg_pool2d(x, (1, 1))
+        out = swish(self.se1(out))
+        out = self.se2(out).sigmoid()
+        out = x * out
+        return out
+
+
+class Block(nn.Module):
+    '''expansion + depthwise + pointwise + squeeze-excitation'''
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride,
+                 expand_ratio=1,
+                 se_ratio=0.,
+                 drop_rate=0.):
+        super(Block, self).__init__()
+        self.stride = stride
+        self.drop_rate = drop_rate
+        self.expand_ratio = expand_ratio
+
+        # Expansion
+        channels = expand_ratio * in_channels
+        self.conv1 = nn.Conv2d(in_channels,
+                               channels,
+                               kernel_size=1,
+                               stride=1,
+                               padding=0,
+                               bias=False)
+        self.bn1 = nn.BatchNorm2d(channels)
+
+        # Depthwise conv
+        self.conv2 = nn.Conv2d(channels,
+                               channels,
+                               kernel_size=kernel_size,
+                               stride=stride,
+                               padding=(1 if kernel_size == 3 else 2),
+                               groups=channels,
+                               bias=False)
+        self.bn2 = nn.BatchNorm2d(channels)
+
+        # SE layers
+        se_channels = int(in_channels * se_ratio)
+        self.se = SE(channels, se_channels)
+
+        # Output
+        self.conv3 = nn.Conv2d(channels,
+                               out_channels,
+                               kernel_size=1,
+                               stride=1,
+                               padding=0,
+                               bias=False)
+        self.bn3 = nn.BatchNorm2d(out_channels)
+
+        # Skip connection if in and out shapes are the same (MV-V2 style)
+        self.has_skip = (stride == 1) and (in_channels == out_channels)
+
+    def forward(self, x):
+        out = x if self.expand_ratio == 1 else swish(self.bn1(self.conv1(x)))
+        out = swish(self.bn2(self.conv2(out)))
+        out = self.se(out)
+        out = self.bn3(self.conv3(out))
+        if self.has_skip:
+            if self.training and self.drop_rate > 0:
+                out = drop_connect(out, self.drop_rate)
+            out = out + x
+        return out
+
+
+class EfficientNet(nn.Module):
+    def __init__(self, cfg, num_classes=10):
+        super(EfficientNet, self).__init__()
+        self.cfg = cfg
+        self.conv1 = nn.Conv2d(3,
+                               32,
+                               kernel_size=3,
+                               stride=1,
+                               padding=1,
+                               bias=False)
+        self.bn1 = nn.BatchNorm2d(32)
+        self.layers = self._make_layers(in_channels=32)
+        self.linear = nn.Linear(cfg['out_channels'][-1], num_classes)
+
+    def _make_layers(self, in_channels):
+        layers = []
+        cfg = [self.cfg[k] for k in ['expansion', 'out_channels', 'num_blocks', 'kernel_size',
+                                     'stride']]
+        b = 0
+        blocks = sum(self.cfg['num_blocks'])
+        for expansion, out_channels, num_blocks, kernel_size, stride in zip(*cfg):
+            strides = [stride] + [1] * (num_blocks - 1)
+            for stride in strides:
+                drop_rate = self.cfg['drop_connect_rate'] * b / blocks
+                layers.append(
+                    Block(in_channels,
+                          out_channels,
+                          kernel_size,
+                          stride,
+                          expansion,
+                          se_ratio=0.25,
+                          drop_rate=drop_rate))
+                in_channels = out_channels
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        out = swish(self.bn1(self.conv1(x)))
+        out = self.layers(out)
+        out = F.adaptive_avg_pool2d(out, 1)
+        out = out.view(out.size(0), -1)
+        dropout_rate = self.cfg['dropout_rate']
+        if self.training and dropout_rate > 0:
+            out = F.dropout(out, p=dropout_rate)
+        out = self.linear(out)
+        return out
+
+
+def EfficientNetB0():
+    cfg = {
+        'num_blocks': [1, 2, 2, 3, 3, 4, 1],
+        'expansion': [1, 6, 6, 6, 6, 6, 6],
+        'out_channels': [16, 24, 40, 80, 112, 192, 320],
+        'kernel_size': [3, 3, 5, 3, 5, 5, 3],
+        'stride': [1, 2, 2, 2, 1, 2, 1],
+        'dropout_rate': 0.2,
+        'drop_connect_rate': 0.2,
+    }
+    return EfficientNet(cfg)
+
+
+def test():
+    net = EfficientNetB0()
+    x = torch.randn(2, 3, 32, 32)
+    y = net(x)
+    print(y.shape)
+
+
+if __name__ == '__main__':
+    test()
--- a/models/Old/googlenet.py
+++ b/models/Old/googlenet.py
@ -0,0 +1,107 @@
+'''GoogLeNet with PyTorch.'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class Inception(nn.Module):
+    def __init__(self, in_planes, n1x1, n3x3red, n3x3, n5x5red, n5x5, pool_planes):
+        super(Inception, self).__init__()
+        # 1x1 conv branch
+        self.b1 = nn.Sequential(
+            nn.Conv2d(in_planes, n1x1, kernel_size=1),
+            nn.BatchNorm2d(n1x1),
+            nn.ReLU(True),
+        )
+
+        # 1x1 conv -> 3x3 conv branch
+        self.b2 = nn.Sequential(
+            nn.Conv2d(in_planes, n3x3red, kernel_size=1),
+            nn.BatchNorm2d(n3x3red),
+            nn.ReLU(True),
+            nn.Conv2d(n3x3red, n3x3, kernel_size=3, padding=1),
+            nn.BatchNorm2d(n3x3),
+            nn.ReLU(True),
+        )
+
+        # 1x1 conv -> 5x5 conv branch
+        self.b3 = nn.Sequential(
+            nn.Conv2d(in_planes, n5x5red, kernel_size=1),
+            nn.BatchNorm2d(n5x5red),
+            nn.ReLU(True),
+            nn.Conv2d(n5x5red, n5x5, kernel_size=3, padding=1),
+            nn.BatchNorm2d(n5x5),
+            nn.ReLU(True),
+            nn.Conv2d(n5x5, n5x5, kernel_size=3, padding=1),
+            nn.BatchNorm2d(n5x5),
+            nn.ReLU(True),
+        )
+
+        # 3x3 pool -> 1x1 conv branch
+        self.b4 = nn.Sequential(
+            nn.MaxPool2d(3, stride=1, padding=1),
+            nn.Conv2d(in_planes, pool_planes, kernel_size=1),
+            nn.BatchNorm2d(pool_planes),
+            nn.ReLU(True),
+        )
+
+    def forward(self, x):
+        y1 = self.b1(x)
+        y2 = self.b2(x)
+        y3 = self.b3(x)
+        y4 = self.b4(x)
+        return torch.cat([y1,y2,y3,y4], 1)
+
+
+class GoogLeNet(nn.Module):
+    def __init__(self):
+        super(GoogLeNet, self).__init__()
+        self.pre_layers = nn.Sequential(
+            nn.Conv2d(3, 192, kernel_size=3, padding=1),
+            nn.BatchNorm2d(192),
+            nn.ReLU(True),
+        )
+
+        self.a3 = Inception(192,  64,  96, 128, 16, 32, 32)
+        self.b3 = Inception(256, 128, 128, 192, 32, 96, 64)
+
+        self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
+
+        self.a4 = Inception(480, 192,  96, 208, 16,  48,  64)
+        self.b4 = Inception(512, 160, 112, 224, 24,  64,  64)
+        self.c4 = Inception(512, 128, 128, 256, 24,  64,  64)
+        self.d4 = Inception(512, 112, 144, 288, 32,  64,  64)
+        self.e4 = Inception(528, 256, 160, 320, 32, 128, 128)
+
+        self.a5 = Inception(832, 256, 160, 320, 32, 128, 128)
+        self.b5 = Inception(832, 384, 192, 384, 48, 128, 128)
+
+        self.avgpool = nn.AvgPool2d(8, stride=1)
+        self.linear = nn.Linear(1024, 10)
+
+    def forward(self, x):
+        out = self.pre_layers(x)
+        out = self.a3(out)
+        out = self.b3(out)
+        out = self.maxpool(out)
+        out = self.a4(out)
+        out = self.b4(out)
+        out = self.c4(out)
+        out = self.d4(out)
+        out = self.e4(out)
+        out = self.maxpool(out)
+        out = self.a5(out)
+        out = self.b5(out)
+        out = self.avgpool(out)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+
+
+def test():
+    net = GoogLeNet()
+    x = torch.randn(1,3,32,32)
+    y = net(x)
+    print(y.size())
+
+# test()
--- a/models/Old/lenet.py
+++ b/models/Old/lenet.py
@ -0,0 +1,23 @@
+'''LeNet in PyTorch.'''
+import torch.nn as nn
+import torch.nn.functional as F
+
+class LeNet(nn.Module):
+    def __init__(self):
+        super(LeNet, self).__init__()
+        self.conv1 = nn.Conv2d(3, 6, 5)
+        self.conv2 = nn.Conv2d(6, 16, 5)
+        self.fc1   = nn.Linear(16*5*5, 120)
+        self.fc2   = nn.Linear(120, 84)
+        self.fc3   = nn.Linear(84, 10)
+
+    def forward(self, x):
+        out = F.relu(self.conv1(x))
+        out = F.max_pool2d(out, 2)
+        out = F.relu(self.conv2(out))
+        out = F.max_pool2d(out, 2)
+        out = out.view(out.size(0), -1)
+        out = F.relu(self.fc1(out))
+        out = F.relu(self.fc2(out))
+        out = self.fc3(out)
+        return out
--- a/models/Old/mobilenet.py
+++ b/models/Old/mobilenet.py
@ -0,0 +1,61 @@
+'''MobileNet in PyTorch.
+
+See the paper "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications"
+for more details.
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class Block(nn.Module):
+    '''Depthwise conv + Pointwise conv'''
+    def __init__(self, in_planes, out_planes, stride=1):
+        super(Block, self).__init__()
+        self.conv1 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=in_planes, bias=False)
+        self.bn1 = nn.BatchNorm2d(in_planes)
+        self.conv2 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
+        self.bn2 = nn.BatchNorm2d(out_planes)
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = F.relu(self.bn2(self.conv2(out)))
+        return out
+
+
+class MobileNet(nn.Module):
+    # (128,2) means conv planes=128, conv stride=2, by default conv stride=1
+    cfg = [64, (128,2), 128, (256,2), 256, (512,2), 512, 512, 512, 512, 512, (1024,2), 1024]
+
+    def __init__(self, num_classes=10):
+        super(MobileNet, self).__init__()
+        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(32)
+        self.layers = self._make_layers(in_planes=32)
+        self.linear = nn.Linear(1024, num_classes)
+
+    def _make_layers(self, in_planes):
+        layers = []
+        for x in self.cfg:
+            out_planes = x if isinstance(x, int) else x[0]
+            stride = 1 if isinstance(x, int) else x[1]
+            layers.append(Block(in_planes, out_planes, stride))
+            in_planes = out_planes
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layers(out)
+        out = F.avg_pool2d(out, 2)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+
+
+def test():
+    net = MobileNet()
+    x = torch.randn(1,3,32,32)
+    y = net(x)
+    print(y.size())
+
+# test()
--- a/models/Old/mobilenetv2.py
+++ b/models/Old/mobilenetv2.py
@ -0,0 +1,86 @@
+'''MobileNetV2 in PyTorch.
+
+See the paper "Inverted Residuals and Linear Bottlenecks:
+Mobile Networks for Classification, Detection and Segmentation" for more details.
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class Block(nn.Module):
+    '''expand + depthwise + pointwise'''
+    def __init__(self, in_planes, out_planes, expansion, stride):
+        super(Block, self).__init__()
+        self.stride = stride
+
+        planes = expansion * in_planes
+        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, stride=1, padding=0, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, groups=planes, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
+        self.bn3 = nn.BatchNorm2d(out_planes)
+
+        self.shortcut = nn.Sequential()
+        if stride == 1 and in_planes != out_planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False),
+                nn.BatchNorm2d(out_planes),
+            )
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = F.relu(self.bn2(self.conv2(out)))
+        out = self.bn3(self.conv3(out))
+        out = out + self.shortcut(x) if self.stride==1 else out
+        return out
+
+
+class MobileNetV2(nn.Module):
+    # (expansion, out_planes, num_blocks, stride)
+    cfg = [(1,  16, 1, 1),
+           (6,  24, 2, 1),  # NOTE: change stride 2 -> 1 for CIFAR10
+           (6,  32, 3, 2),
+           (6,  64, 4, 2),
+           (6,  96, 3, 1),
+           (6, 160, 3, 2),
+           (6, 320, 1, 1)]
+
+    def __init__(self, num_classes=10):
+        super(MobileNetV2, self).__init__()
+        # NOTE: change conv1 stride 2 -> 1 for CIFAR10
+        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(32)
+        self.layers = self._make_layers(in_planes=32)
+        self.conv2 = nn.Conv2d(320, 1280, kernel_size=1, stride=1, padding=0, bias=False)
+        self.bn2 = nn.BatchNorm2d(1280)
+        self.linear = nn.Linear(1280, num_classes)
+
+    def _make_layers(self, in_planes):
+        layers = []
+        for expansion, out_planes, num_blocks, stride in self.cfg:
+            strides = [stride] + [1]*(num_blocks-1)
+            for stride in strides:
+                layers.append(Block(in_planes, out_planes, expansion, stride))
+                in_planes = out_planes
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layers(out)
+        out = F.relu(self.bn2(self.conv2(out)))
+        # NOTE: change pooling kernel_size 7 -> 4 for CIFAR10
+        out = F.avg_pool2d(out, 4)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+
+
+def test():
+    net = MobileNetV2()
+    x = torch.randn(2,3,32,32)
+    y = net(x)
+    print(y.size())
+
+# test()
--- a/models/Old/mylenet.py
+++ b/models/Old/mylenet.py
@ -0,0 +1,71 @@
+'''LeNet in PyTorch.'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+class MyLeNet(nn.Module):
+    def __init__(self):
+        super(MyLeNet, self).__init__()
+        self.conv1 = nn.Conv2d(3, 6, 5)
+        self.conv2 = nn.Conv2d(6, 16, 5)
+        self.fc1   = nn.Linear(16*5*5, 120)
+        self.fc2   = nn.Linear(120, 84)
+        self.fc3   = nn.Linear(84, 10)        
+
+    def savg_pool2d(self,x,size):
+        b,c,h,w = x.shape
+        selh = torch.LongTensor(h/size,w/size).random_(0, size)
+        rngh = torch.arange(0,h,size).long().view(h/size,1).repeat(1,w/size).view(h/size,w/size)
+        selx = (selh+rngh).repeat(b,c,1,1)
+
+        selw = torch.LongTensor(h/size,w/size).random_(0, size)
+        rngw = torch.arange(0,w,size).long().view(1,h/size).repeat(h/size,1).view(h/size,w/size)
+        sely = (selw+rngw).repeat(b,c,1,1)
+        bv, cv ,hv, wv = torch.meshgrid([torch.arange(0,b), torch.arange(0,c),torch.arange(0,h/size),torch.arange(0,w/size)])
+        #x=x.view(b,c,h*w)
+        newx = x[bv,cv, selx, sely]
+        #ghdh
+        return newx
+
+    def ssoftmax_pool2d(self,x,size,idx):
+        b,c,h,w = x.shape
+        w = wdataset[idx]
+        selh = torch.LongTensor(h/size,w/size).random_(0, size)
+        rngh = torch.arange(0,h,size).long().view(h/size,1).repeat(1,w/size).view(h/size,w/size)
+        selx = (selh+rngh).repeat(b,c,1,1)
+
+        selw = torch.LongTensor(h/size,w/size).random_(0, size)
+        rngw = torch.arange(0,w,size).long().view(1,h/size).repeat(h/size,1).view(h/size,w/size)
+        sely = (selw+rngw).repeat(b,c,1,1)
+        bv, cv ,hv, wv = torch.meshgrid([torch.arange(0,b), torch.arange(0,c),torch.arange(0,h/size),torch.arange(0,w/size)])
+        #x=x.view(b,c,h*w)
+        newx = x[bv,cv, selx, sely]
+        #ghdh
+        return newx
+
+    def mavg_pool2d(self,x,size):
+        b,c,h,w = x.shape
+        #newx=(x[:,:,0::2,0::2]+x[:,:,1::2,0::2]+x[:,:,0::2,1::2]+x[:,:,1::2,1::2])/4
+        newx=(x[:,:,0::2,0::2])
+        return newx
+
+
+    def forward(self, x, stoch=True):
+        if self.training==False:
+            stoch=False
+        out = F.relu(self.conv1(x))
+        if stoch:
+            out = self.savg_pool2d(out, 2)
+        else:
+            out = F.avg_pool2d(out, 2)
+        out = F.relu(self.conv2(out))
+        if stoch:
+            out = self.savg_pool2d(out, 2)
+        else:
+            out = F.avg_pool2d(out, 2)
+        out = out.view(out.size(0), -1)
+        out = F.relu(self.fc1(out))
+        out = F.relu(self.fc2(out))
+        out = self.fc3(out)
+        return out
+    
--- a/models/Old/mylenet2.py
+++ b/models/Old/mylenet2.py
@ -0,0 +1,123 @@
+'''LeNet in PyTorch.'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class MyLeNet2(nn.Module):
+    def __init__(self):
+        super(MyLeNet2, self).__init__()
+        self.conv1 = nn.Conv2d(3, 60, 5)
+        self.conv2 = nn.Conv2d(60, 160, 5)
+        self.fc1   = nn.Linear(160*5*5, 120)
+        self.fc2   = nn.Linear(120, 84)
+        self.fc3   = nn.Linear(84, 10)        
+
+# Vanilla Convolution
+    def myconv2d(self, input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1):
+        batch_size, in_channels, in_h, in_w = input.shape
+        out_channels, in_channels, kh, kw =  weight.shape
+        out_h = in_h-2*(int(kh)/2)
+        out_w = in_w-2*(int(kw)/2)
+
+        unfold = torch.nn.Unfold(kernel_size=(kh, kw), dilation=dilation, padding=padding, stride=stride)
+        inp_unf = unfold(input)#.view(batch_size,in_channels*kh*kw,out_h,out_w)
+                
+
+        if bias is None:
+            out_unf = inp_unf.transpose(1, 2).matmul(weight.view(weight.size(0), -1).t()).transpose(1, 2)
+        else:
+            out_unf = (inp_unf.transpose(1, 2).matmul(weight.view(weight.size(0), -1).t()) + bias).transpose(1, 2)
+
+        out = out_unf.view(batch_size, out_channels, out_h, out_w)
+        return out
+
+    def myconv2d_avg(self, input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1,size=2):
+        batch_size, in_channels, in_h, in_w = input.shape
+        out_channels, in_channels, kh, kw =  weight.shape
+        out_h = in_h-2*(int(kh)/2)
+        out_w = in_w-2*(int(kw)/2)
+
+        unfold = torch.nn.Unfold(kernel_size=(kh, kw), dilation=dilation, padding=padding, stride=stride)
+        inp_unf = unfold(input).view(batch_size,in_channels*kh*kw,out_h,out_w)
+        sel_h = torch.LongTensor(out_h/size,out_w/size).random_(0, size)#.cuda()
+        rng_h = sel_h + torch.arange(0,out_h,size).long()#.cuda()
+
+        sel_w = torch.LongTensor(out_h/size,out_w/size).random_(0, size)#.cuda()
+        rng_w = sel_w+torch.arange(0,out_w,size).long()#.cuda()
+        inp_unf = inp_unf[:,:,rng_h,rng_w].view(batch_size,in_channels*kh*kw,out_h/size*out_w/size)
+        #unfold_avg = torch.nn.Unfold(kernel_size=(1, 1), dilation=1, padding=0, stride=2)
+
+        if bias is None:
+            out_unf = inp_unf.transpose(1, 2).matmul(weight.view(weight.size(0), -1).t()).transpose(1, 2)
+        else:
+            out_unf = (inp_unf.transpose(1, 2).matmul(weight.view(weight.size(0), -1).t()) + bias).transpose(1, 2)
+
+        out = out_unf.view(batch_size, out_channels, out_h/size, out_w/size).contiguous()
+        return out
+
+
+    def savg_pool2d(self,x,size):
+        b,c,h,w = x.shape
+        selh = torch.LongTensor(h/size,w/size).random_(0, size)
+        rngh = torch.arange(0,h,size).long().view(h/size,1).repeat(1,w/size).view(h/size,w/size)
+        selx = (selh+rngh).repeat(b,c,1,1)
+
+        selw = torch.LongTensor(h/size,w/size).random_(0, size)
+        rngw = torch.arange(0,w,size).long().view(1,h/size).repeat(h/size,1).view(h/size,w/size)
+        sely = (selw+rngw).repeat(b,c,1,1)
+        bv, cv ,hv, wv = torch.meshgrid([torch.arange(0,b), torch.arange(0,c),torch.arange(0,h/size),torch.arange(0,w/size)])
+        #x=x.view(b,c,h*w)
+        newx = x[bv,cv, selx, sely]
+        #ghdh
+        return newx
+
+    def ssoftmax_pool2d(self,x,size,idx):
+        b,c,h,w = x.shape
+        w = wdataset[idx]
+        selh = torch.LongTensor(h/size,w/size).random_(0, size)
+        rngh = torch.arange(0,h,size).long().view(h/size,1).repeat(1,w/size).view(h/size,w/size)
+        selx = (selh+rngh).repeat(b,c,1,1)
+
+        selw = torch.LongTensor(h/size,w/size).random_(0, size)
+        rngw = torch.arange(0,w,size).long().view(1,h/size).repeat(h/size,1).view(h/size,w/size)
+        sely = (selw+rngw).repeat(b,c,1,1)
+        bv, cv ,hv, wv = torch.meshgrid([torch.arange(0,b), torch.arange(0,c),torch.arange(0,h/size),torch.arange(0,w/size)])
+        #x=x.view(b,c,h*w)
+        newx = x[bv,cv, selx, sely]
+        #ghdh
+        return newx
+
+    def mavg_pool2d(self,x,size):
+        b,c,h,w = x.shape
+        #newx=(x[:,:,0::2,0::2]+x[:,:,1::2,0::2]+x[:,:,0::2,1::2]+x[:,:,1::2,1::2])/4
+        newx=(x[:,:,0::2,0::2])
+        return newx
+
+
+    def forward(self, x, stoch=True):
+        if self.training==False:
+            stoch=False
+        #out = F.relu(self.conv1(x))
+        out = F.relu(self.myconv2d(x, self.conv1.weight, bias=self.conv1.bias))
+        if stoch:
+            out = self.savg_pool2d(out, 2)
+        else:
+            out = F.avg_pool2d(out, 2)
+        #out = F.relu(self.conv2(out))
+        if 0:
+            out = F.relu(self.myconv2d_avg(out, self.conv2.weight, bias=self.conv2.bias,size=2)) 
+        else:
+            #out = F.relu(self.conv2(out))
+            out = F.relu(self.myconv2d(out, self.conv2.weight, bias=self.conv2.bias))     
+            out = F.avg_pool2d(out, 2)
+        #if stoch:
+        #    out = self.savg_pool2d(out, 2)
+        #else:
+        #    out = F.avg_pool2d(out, 2)
+        out = out.view(out.size(0), -1 )
+        out = F.relu(self.fc1(out))
+        out = F.relu(self.fc2(out))
+        out = self.fc3(out)
+        return out
+    
--- a/models/Old/mylenet3.py
+++ b/models/Old/mylenet3.py
@ -0,0 +1,238 @@
+'''LeNet in PyTorch.'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+import math
+
+from .sconv2davg import SConv2dAvg
+
+class MyLeNetNormal(nn.Module):#epoch 12s
+    def __init__(self):
+        super(MyLeNetNormal, self).__init__()
+        self.conv1 = nn.Conv2d(3, 200, 5, stride=1)
+        self.conv2 = nn.Conv2d(200, 400, 3, stride=1)
+        self.conv3 = nn.Conv2d(400, 800, 3, stride=1)
+        self.fc1   = nn.Linear(800, 10)
+
+    def forward(self, x, stoch=True):
+        _,_,h0,w0 = x.shape
+        out = F.relu(self.conv1(x))
+        _,_,h1,w1 = out.shape
+        out = F.avg_pool2d(out,2,ceil_mode=True)
+        out = F.relu(self.conv2(out))
+        _,_,h2,w2 = out.shape
+        out = F.avg_pool2d(out,2,ceil_mode=True)
+        out = F.relu(self.conv3(out))
+        out = F.avg_pool2d(out,4,ceil_mode=True)
+        
+        out = out.view(out.size(0), -1 )
+        out = (self.fc1(out))
+
+        return out
+
+def savg_pool2d(x,size,ceil_mode=False):
+    b,c,h,w = x.shape
+    device = x.device
+    if ceil_mode:
+        out_h = math.ceil(h/size)
+        out_w = math.ceil(w/size)
+    else:
+        out_h = math.floor(h/size)
+        out_w = math.floor(w/size)
+    selh = torch.randint(size,(out_h,out_w), device=device)
+    #selh[:] = 0
+    rngh = torch.arange(0,h,size,device=x.device).view(-1,1)
+    selh = selh+rngh
+
+    selw = torch.randint(size,(out_h,out_w), device=device)
+    #selw[:] = 0
+    rngw = torch.arange(0,w,size,device=x.device)
+    selw = selw+rngw
+
+    newx = x[:,:, selh, selw]
+    return newx
+
+def savg_pool2d_(x,size,ceil_mode=False):
+    b,c,h,w = x.shape
+    device = x.device
+    selh = torch.randint(size,(math.floor(h/size),math.floor(w/size)), device=device)
+    rngh = torch.arange(0,h,size, device=device).long().view(h/size,1).repeat(1,w/size).view(math.floor(h/size),math.floor(w/size))
+    selx = (selh+rngh).repeat(b,c,1,1)
+
+    selw = torch.randint(size,(math.floor(h/size),math.floor(w/size)), device=device)
+    rngw = torch.arange(0,w,size, device=device).long().view(1,h/size).repeat(h/size,1).view(math.floor(h/size),math.floor(w/size))
+    sely = (selw+rngw).repeat(b,c,1,1)
+    bv, cv ,hv, wv = torch.meshgrid([torch.arange(0,b), torch.arange(0,c),torch.arange(0,h/size),torch.arange(0,w/size)])
+    #x=x.view(b,c,h*w)
+    newx = x[bv,cv, selx, sely]
+    #ghdh
+    return newx
+
+class MyLeNetSimNormal(nn.Module):#epoch 12s
+    def __init__(self):
+        super(MyLeNetSimNormal, self).__init__()
+        self.conv1 = nn.Conv2d(3, 200, 5, stride=1)
+        self.conv2 = nn.Conv2d(200, 400, 3, stride=1)
+        self.conv3 = nn.Conv2d(400, 800, 3, stride=1)
+        self.fc1   = nn.Linear(800, 10)
+
+    def forward(self, x, stoch=True):
+
+        #stoch=True
+        out = F.relu(self.conv1(x))
+        if stoch:
+            out = savg_pool2d(out,2,ceil_mode=True)
+        else:
+            out = F.avg_pool2d(out,2,ceil_mode=True)
+        out = F.relu(self.conv2(out))
+        if stoch:
+            out = savg_pool2d(out,2,ceil_mode=True)
+        else:
+            out = F.avg_pool2d(out,2,ceil_mode=True)
+        out = F.relu(self.conv3(out))
+        if stoch:
+            out = savg_pool2d(out,4,ceil_mode=True)
+        else:
+            out = F.avg_pool2d(out,4,ceil_mode=True)
+        
+        out = out.view(out.size(0), -1 )
+        out = (self.fc1(out))
+        return out
+
+
+class MyLeNetStride(nn.Module):#epoch 6s
+    def __init__(self):
+        super(MyLeNetStride, self).__init__()
+        self.conv1 = nn.Conv2d(3, 200, 5, stride=2)
+        self.conv2 = nn.Conv2d(200, 400, 3, stride=2)
+        self.conv3 = nn.Conv2d(400, 800, 3, stride=4)
+        self.fc1   = nn.Linear(800, 10)
+
+    def forward(self, x, stoch=True):
+
+        out = F.relu(self.conv1(x))
+        out = F.relu(self.conv2(out))
+        out = F.relu(self.conv3(out))
+
+        out = out.view(out.size(0), -1 )
+        out = (self.fc1(out))
+        return out
+
+class MyLeNetMatNormal(nn.Module):#epach 21s
+    def __init__(self):
+        super(MyLeNetMatNormal, self).__init__()
+        self.conv1 = SConv2dAvg(3, 200, 5, stride=1)
+        self.conv2 = SConv2dAvg(200, 400, 3, stride=1)
+        self.conv3 = SConv2dAvg(400, 800, 3, stride=1)
+        self.fc1   = nn.Linear(800, 10)
+
+    def forward(self, x, stoch=True):
+        _,_,h0,w0 = x.shape
+        out = F.relu(self.conv1(x))
+        out = F.avg_pool2d(out,2,ceil_mode=True)
+
+        _,_,h1,w1 = out.shape
+        out = F.relu(self.conv2(out))
+        out = F.avg_pool2d(out,2,ceil_mode=True)
+
+        _,_,h2,w2 = out.shape
+        out = F.relu(self.conv3(out))
+        out = F.avg_pool2d(out,4,ceil_mode=True)
+
+        out = out.view(out.size(0), -1 )
+        out = (self.fc1(out))
+
+        if 1:
+            comp = 0
+            comp+=self.conv1.comp(h0,w0)
+            comp+=self.conv2.comp(h1,w1)
+            comp+=self.conv3.comp(h2,w2)
+            self.comp = comp/1000000
+        return out
+
+
+class MyLeNetMatStoch(nn.Module):#epoch 17s
+    def __init__(self):
+        super(MyLeNetMatStoch, self).__init__()
+        self.conv1 = SConv2dAvg(3, 200, 5, stride=2,ceil_mode=True)
+        self.conv2 = SConv2dAvg(200, 400, 3, stride=2,ceil_mode=True)
+        self.conv3 = SConv2dAvg(400, 800, 3, stride=4,ceil_mode=True)
+        self.fc1   = nn.Linear(800, 10)
+
+    def forward(self, x, stoch=True):
+        # if stoch:
+        _,_,h0,w0=x.shape
+        out = F.relu(self.conv1(x,stoch=stoch))
+        _,_,h1,w1=out.shape
+        out = F.relu(self.conv2(out,stoch=stoch))
+        _,_,h2,w2=out.shape
+        out = F.relu(self.conv3(out,stoch=stoch))
+        # else:
+        #     out = F.relu(self.conv1(x,stoch=True,stride=1))
+        #     out = F.avg_pool2d(out,2,ceil_mode=True)
+        #     out = F.relu(self.conv2(out,stoch=True,stride=1))
+        #     out = F.avg_pool2d(out,2,ceil_mode=True)
+        #     out = F.relu(self.conv3(out,stoch=True,stride=1))
+        #     out = F.avg_pool2d(out,4,ceil_mode=True)
+
+        out = out.view(out.size(0), -1 )
+        out = self.fc1(out)
+        #Estimate computation
+        if 1:
+            comp = 0
+            comp+=self.conv1.comp(h0,w0)
+            comp+=self.conv2.comp(h1,w1)
+            comp+=self.conv3.comp(h2,w2)
+            self.comp = comp/1000000
+        return out
+    
+class MyLeNetMatStochBU(nn.Module):#epoch 11s 
+    def __init__(self):
+        super(MyLeNetMatStochBU, self).__init__()
+        self.conv1 = SConv2dAvg(3, 200, 5, stride=2,ceil_mode=True)
+        self.conv2 = SConv2dAvg(200, 400, 3, stride=2,ceil_mode=True)
+        self.conv3 = SConv2dAvg(400, 800, 3, stride=4,ceil_mode=True)
+        self.fc1   = nn.Linear(800, 10)
+
+    def forward(self, x, stoch=True):
+        #get sizes
+        h0,w0 = x.shape[2],x.shape[3]
+        h1,w1 = self.conv1.get_size(h0,w0)
+        h2,w2 = self.conv2.get_size(h1,w1)
+        h3,w3 = self.conv3.get_size(h2,w2)
+        # print('Shapes :')
+        # print('0', h0, w0)
+        # print('1', h1, w1)
+        # print('2', h2, w2)
+        # print('3', h3, w3)
+        #sample BU
+        # mask3 = torch.ones(h3,w3).cuda()
+        mask3 = torch.ones((h3,w3), device=x.device)
+        selh3,selw3,mask2 = self.conv3.sample(h2,w2,mask=mask3)
+        selh2,selw2,mask1 = self.conv2.sample(h1,w1,mask=mask2)
+        selh1,selw1,mask0 = self.conv1.sample(h0,w0,mask=mask1)
+        #forward
+        if stoch:
+            out = F.relu(self.conv1(x,selh1,selw1,mask1,stoch=stoch))
+            out = F.relu(self.conv2(out,selh2,selw2,mask2,stoch=stoch))
+            out = F.relu(self.conv3(out,selh3,selw3,mask3,stoch=stoch))
+        else:
+            out = F.relu(self.conv1(x,stoch=True,stride=1))
+            out = F.avg_pool2d(out,2,ceil_mode=True)
+            out = F.relu(self.conv2(out,stoch=True,stride=1))
+            out = F.avg_pool2d(out,2,ceil_mode=True)
+            out = F.relu(self.conv3(out,stoch=True,stride=1))
+            out = F.avg_pool2d(out,4,ceil_mode=True)
+
+        out = out.view(out.size(0), -1 )
+        out = (self.fc1(out))
+        #Estimate computation
+        if 1:
+            comp = 0
+            comp+=self.conv1.comp(h0,w0,mask1)
+            comp+=self.conv2.comp(h1,w1,mask2)
+            comp+=self.conv3.comp(h2,w2,mask3)
+            self.comp = comp.item()/1000000
+        return out
+
--- a/models/Old/myresnet.py
+++ b/models/Old/myresnet.py
@ -0,0 +1,159 @@
+'''ResNet in PyTorch.
+
+For Pre-activation ResNet, see 'preact_resnet.py'.
+
+Reference:
+[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
+    Deep Residual Learning for Image Recognition. arXiv:1512.03385
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, in_planes, planes, stride=1):
+        super(BasicBlock, self).__init__()
+        self.conv1 = nn.Conv2d(
+            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
+                               stride=1, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != self.expansion*planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, self.expansion*planes,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(self.expansion*planes)
+            )
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.bn2(self.conv2(out))
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+
+
+class Bottleneck(nn.Module):
+    expansion = 4
+
+    def __init__(self, in_planes, planes, stride=1):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
+                               stride=stride, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(planes, self.expansion *
+                               planes, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(self.expansion*planes)
+
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != self.expansion*planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, self.expansion*planes,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(self.expansion*planes)
+            )
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = F.relu(self.bn2(self.conv2(out)))
+        out = self.bn3(self.conv3(out))
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+
+
+class ResNet(nn.Module):
+    def __init__(self, block, num_blocks, num_classes=10,stoch=False):
+        super(ResNet, self).__init__()
+        self.in_planes = 64
+
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
+                               stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
+        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
+        self.linear = nn.Linear(512*block.expansion, num_classes)
+        self.stoch = stoch
+
+    def _make_layer(self, block, planes, num_blocks, stride):
+        strides = [stride] + [1]*(num_blocks-1)
+        layers = []
+        for stride in strides:
+            layers.append(block(self.in_planes, planes, stride))
+            self.in_planes = planes * block.expansion
+        return nn.Sequential(*layers)
+
+    def savg_pool2d(self,x,size,locx=-1,locy=-1):
+        b,c,h,w = x.shape
+        if loc==-1:
+            selh = torch.LongTensor(h/size,w/size).random_(0, size)
+        else:
+            selh = torch.ones(h/size,w/size).long()*loc
+        rngh = torch.arange(0,h,size).long().view(h/size,1).repeat(1,w/size).view(h/size,w/size)
+        selx = (selh+rngh).repeat(b,c,1,1)
+        if loc==-1:
+            selw = torch.LongTensor(h/size,w/size).random_(0, size)
+        else:
+            selw = torch.ones(h/size,w/size).long()*loc
+        rngw = torch.arange(0,w,size).long().view(1,h/size).repeat(h/size,1).view(h/size,w/size)
+        sely = (selw+rngw).repeat(b,c,1,1)
+        bv, cv ,hv, wv = torch.meshgrid([torch.arange(0,b), torch.arange(0,c),torch.arange(0,h/size),torch.arange(0,w/size)])
+        #x=x.view(b,c,h*w)
+        newx = x[bv,cv, selx, sely]
+        #ghdh
+        return newx
+
+    def forward(self, x ,stoch = True):
+        #if self.training==False:
+        #    stoch=False
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = self.layer4(out)
+        if self.stoch:
+            if stoch:
+                out = self.savg_pool2d(out, 4)
+        else:
+            out = F.avg_pool2d(out, 4)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+
+
+def MyResNet18(stoch=False):
+    return ResNet(BasicBlock, [2, 2, 2, 2],stoch=stoch)
+
+
+def ResNet34():
+    return ResNet(BasicBlock, [3, 4, 6, 3])
+
+
+def MyResNet50():
+    return ResNet(Bottleneck, [3, 4, 6, 3])
+
+
+def ResNet101():
+    return ResNet(Bottleneck, [3, 4, 23, 3])
+
+
+def ResNet152():
+    return ResNet(Bottleneck, [3, 8, 36, 3])
+
+
+def test():
+    net = ResNet18()
+    y = net(torch.randn(1, 3, 32, 32))
+    print(y.size())
+
+# test()
--- a/models/Old/myresnet2.py
+++ b/models/Old/myresnet2.py
@ -0,0 +1,187 @@
+'''ResNet in PyTorch.
+
+For Pre-activation ResNet, see 'preact_resnet.py'.
+
+Reference:
+[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
+    Deep Residual Learning for Image Recognition. arXiv:1512.03385
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, in_planes, planes, stride=1):
+        super(BasicBlock, self).__init__()
+        self.conv1 = nn.Conv2d(
+            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
+                               stride=1, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != self.expansion*planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, self.expansion*planes,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(self.expansion*planes)
+            )
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.bn2(self.conv2(out))
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+
+
+class Bottleneck(nn.Module):
+    expansion = 4
+
+    def __init__(self, in_planes, planes, stride=1):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
+                               stride=stride, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(planes, self.expansion *
+                               planes, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(self.expansion*planes)
+
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != self.expansion*planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, self.expansion*planes,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(self.expansion*planes)
+            )
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = F.relu(self.bn2(self.conv2(out)))
+        out = self.bn3(self.conv3(out))
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+
+
+class ResNet(nn.Module):
+    def __init__(self, block, num_blocks, num_classes=10,stoch=False):
+        super(ResNet, self).__init__()
+        self.in_planes = 64
+
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
+                               stride=1, padding=1, bias=False)
+        self.conv2 = nn.Conv2d(512, 512, kernel_size=3,
+                               stride=1, padding=1, bias=True)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
+        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
+        self.linear = nn.Linear(512*block.expansion, num_classes)
+        self.stoch = stoch
+
+    def _make_layer(self, block, planes, num_blocks, stride):
+        strides = [stride] + [1]*(num_blocks-1)
+        layers = []
+        for stride in strides:
+            layers.append(block(self.in_planes, planes, stride))
+            self.in_planes = planes * block.expansion
+        return nn.Sequential(*layers)
+
+    def myconv2d_avg(self, input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1,size=2):
+        batch_size, in_channels, in_h, in_w = input.shape
+        out_channels, in_channels, kh, kw =  weight.shape
+        out_h = (in_h+2*padding)-2*(int(kh)/2)
+        out_w = (in_w+2*padding)-2*(int(kw)/2)
+
+        unfold = torch.nn.Unfold(kernel_size=(kh, kw), dilation=dilation, padding=padding, stride=stride)
+        inp_unf = unfold(input).view(batch_size,in_channels*kh*kw,out_h,out_w)
+        sel_h = torch.LongTensor(out_h/size,out_w/size).random_(0, size)#.cuda()
+        rng_h = sel_h + torch.arange(0,out_h,size).long()#.cuda()
+
+        sel_w = torch.LongTensor(out_h/size,out_w/size).random_(0, size)#.cuda()
+        rng_w = sel_w+torch.arange(0,out_w,size).long()#.cuda()
+        inp_unf = inp_unf[:,:,rng_h,rng_w].view(batch_size,in_channels*kh*kw,out_h/size*out_w/size)
+        #unfold_avg = torch.nn.Unfold(kernel_size=(1, 1), dilation=1, padding=0, stride=2)
+
+        if bias is None:
+            out_unf = inp_unf.transpose(1, 2).matmul(weight.view(weight.size(0), -1).t()).transpose(1, 2)
+        else:
+            out_unf = (inp_unf.transpose(1, 2).matmul(weight.view(weight.size(0), -1).t()) + bias).transpose(1, 2)
+
+        out = out_unf.view(batch_size, out_channels, out_h/size, out_w/size).contiguous()
+        return out
+
+
+    def savg_pool2d(self,x,size,locx=-1,locy=-1):
+        b,c,h,w = x.shape
+        if locx==-1:
+            selh = torch.LongTensor(h/size,w/size).random_(0, size)
+        else:
+            selh = torch.ones(h/size,w/size).long()*loc
+        rngh = torch.arange(0,h,size).long().view(h/size,1).repeat(1,w/size).view(h/size,w/size)
+        selx = (selh+rngh).repeat(b,c,1,1)
+        if locy==-1:
+            selw = torch.LongTensor(h/size,w/size).random_(0, size)
+        else:
+            selw = torch.ones(h/size,w/size).long()*loc
+        rngw = torch.arange(0,w,size).long().view(1,h/size).repeat(h/size,1).view(h/size,w/size)
+        sely = (selw+rngw).repeat(b,c,1,1)
+        bv, cv ,hv, wv = torch.meshgrid([torch.arange(0,b), torch.arange(0,c),torch.arange(0,h/size),torch.arange(0,w/size)])
+        #x=x.view(b,c,h*w)
+        newx = x[bv,cv, selx, sely]
+        #ghdh
+        return newx
+
+    def forward(self, x ,stoch = True):
+        #if self.training==False:
+        #    stoch=False
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = self.layer4(out)
+        if self.stoch and stoch:
+            out = F.relu(self.myconv2d_avg(out, self.conv2.weight, bias=self.conv2.bias,padding=1,size=4)) 
+            #out = F.avg_pool2d(out, 2)
+        else:
+            out = F.relu(self.myconv2d_avg(out, self.conv2.weight, bias=self.conv2.bias,padding=1,size=1))
+            out = F.avg_pool2d(out, 4)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+
+
+def MyResNet18(stoch=False):
+    return ResNet(BasicBlock, [2, 2, 2, 2],stoch=stoch)
+
+
+def ResNet34():
+    return ResNet(BasicBlock, [3, 4, 6, 3])
+
+
+def MyResNet50():
+    return ResNet(Bottleneck, [3, 4, 6, 3])
+
+
+def ResNet101():
+    return ResNet(Bottleneck, [3, 4, 23, 3])
+
+
+def ResNet152():
+    return ResNet(Bottleneck, [3, 8, 36, 3])
+
+
+def test():
+    net = ResNet18()
+    y = net(torch.randn(1, 3, 32, 32))
+    print(y.size())
+
+# test()
--- a/models/Old/pnasnet.py
+++ b/models/Old/pnasnet.py
@ -0,0 +1,125 @@
+'''PNASNet in PyTorch.
+
+Paper: Progressive Neural Architecture Search
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class SepConv(nn.Module):
+    '''Separable Convolution.'''
+    def __init__(self, in_planes, out_planes, kernel_size, stride):
+        super(SepConv, self).__init__()
+        self.conv1 = nn.Conv2d(in_planes, out_planes,
+                               kernel_size, stride,
+                               padding=(kernel_size-1)//2,
+                               bias=False, groups=in_planes)
+        self.bn1 = nn.BatchNorm2d(out_planes)
+
+    def forward(self, x):
+        return self.bn1(self.conv1(x))
+
+
+class CellA(nn.Module):
+    def __init__(self, in_planes, out_planes, stride=1):
+        super(CellA, self).__init__()
+        self.stride = stride
+        self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride)
+        if stride==2:
+            self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
+            self.bn1 = nn.BatchNorm2d(out_planes)
+
+    def forward(self, x):
+        y1 = self.sep_conv1(x)
+        y2 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1)
+        if self.stride==2:
+            y2 = self.bn1(self.conv1(y2))
+        return F.relu(y1+y2)
+
+class CellB(nn.Module):
+    def __init__(self, in_planes, out_planes, stride=1):
+        super(CellB, self).__init__()
+        self.stride = stride
+        # Left branch
+        self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride)
+        self.sep_conv2 = SepConv(in_planes, out_planes, kernel_size=3, stride=stride)
+        # Right branch
+        self.sep_conv3 = SepConv(in_planes, out_planes, kernel_size=5, stride=stride)
+        if stride==2:
+            self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
+            self.bn1 = nn.BatchNorm2d(out_planes)
+        # Reduce channels
+        self.conv2 = nn.Conv2d(2*out_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
+        self.bn2 = nn.BatchNorm2d(out_planes)
+
+    def forward(self, x):
+        # Left branch
+        y1 = self.sep_conv1(x)
+        y2 = self.sep_conv2(x)
+        # Right branch
+        y3 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1)
+        if self.stride==2:
+            y3 = self.bn1(self.conv1(y3))
+        y4 = self.sep_conv3(x)
+        # Concat & reduce channels
+        b1 = F.relu(y1+y2)
+        b2 = F.relu(y3+y4)
+        y = torch.cat([b1,b2], 1)
+        return F.relu(self.bn2(self.conv2(y)))
+
+class PNASNet(nn.Module):
+    def __init__(self, cell_type, num_cells, num_planes):
+        super(PNASNet, self).__init__()
+        self.in_planes = num_planes
+        self.cell_type = cell_type
+
+        self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(num_planes)
+
+        self.layer1 = self._make_layer(num_planes, num_cells=6)
+        self.layer2 = self._downsample(num_planes*2)
+        self.layer3 = self._make_layer(num_planes*2, num_cells=6)
+        self.layer4 = self._downsample(num_planes*4)
+        self.layer5 = self._make_layer(num_planes*4, num_cells=6)
+
+        self.linear = nn.Linear(num_planes*4, 10)
+
+    def _make_layer(self, planes, num_cells):
+        layers = []
+        for _ in range(num_cells):
+            layers.append(self.cell_type(self.in_planes, planes, stride=1))
+            self.in_planes = planes
+        return nn.Sequential(*layers)
+
+    def _downsample(self, planes):
+        layer = self.cell_type(self.in_planes, planes, stride=2)
+        self.in_planes = planes
+        return layer
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = self.layer4(out)
+        out = self.layer5(out)
+        out = F.avg_pool2d(out, 8)
+        out = self.linear(out.view(out.size(0), -1))
+        return out
+
+
+def PNASNetA():
+    return PNASNet(CellA, num_cells=6, num_planes=44)
+
+def PNASNetB():
+    return PNASNet(CellB, num_cells=6, num_planes=32)
+
+
+def test():
+    net = PNASNetB()
+    x = torch.randn(1,3,32,32)
+    y = net(x)
+    print(y)
+
+# test()
--- a/models/Old/preact_resnet.py
+++ b/models/Old/preact_resnet.py
@ -0,0 +1,118 @@
+'''Pre-activation ResNet in PyTorch.
+
+Reference:
+[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
+    Identity Mappings in Deep Residual Networks. arXiv:1603.05027
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class PreActBlock(nn.Module):
+    '''Pre-activation version of the BasicBlock.'''
+    expansion = 1
+
+    def __init__(self, in_planes, planes, stride=1):
+        super(PreActBlock, self).__init__()
+        self.bn1 = nn.BatchNorm2d(in_planes)
+        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
+
+        if stride != 1 or in_planes != self.expansion*planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
+            )
+
+    def forward(self, x):
+        out = F.relu(self.bn1(x))
+        shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
+        out = self.conv1(out)
+        out = self.conv2(F.relu(self.bn2(out)))
+        out += shortcut
+        return out
+
+
+class PreActBottleneck(nn.Module):
+    '''Pre-activation version of the original Bottleneck module.'''
+    expansion = 4
+
+    def __init__(self, in_planes, planes, stride=1):
+        super(PreActBottleneck, self).__init__()
+        self.bn1 = nn.BatchNorm2d(in_planes)
+        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
+
+        if stride != 1 or in_planes != self.expansion*planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
+            )
+
+    def forward(self, x):
+        out = F.relu(self.bn1(x))
+        shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
+        out = self.conv1(out)
+        out = self.conv2(F.relu(self.bn2(out)))
+        out = self.conv3(F.relu(self.bn3(out)))
+        out += shortcut
+        return out
+
+
+class PreActResNet(nn.Module):
+    def __init__(self, block, num_blocks, num_classes=10):
+        super(PreActResNet, self).__init__()
+        self.in_planes = 64
+
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
+        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
+        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
+        self.linear = nn.Linear(512*block.expansion, num_classes)
+
+    def _make_layer(self, block, planes, num_blocks, stride):
+        strides = [stride] + [1]*(num_blocks-1)
+        layers = []
+        for stride in strides:
+            layers.append(block(self.in_planes, planes, stride))
+            self.in_planes = planes * block.expansion
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        out = self.conv1(x)
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = self.layer4(out)
+        out = F.avg_pool2d(out, 4)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+
+
+def PreActResNet18():
+    return PreActResNet(PreActBlock, [2,2,2,2])
+
+def PreActResNet34():
+    return PreActResNet(PreActBlock, [3,4,6,3])
+
+def PreActResNet50():
+    return PreActResNet(PreActBottleneck, [3,4,6,3])
+
+def PreActResNet101():
+    return PreActResNet(PreActBottleneck, [3,4,23,3])
+
+def PreActResNet152():
+    return PreActResNet(PreActBottleneck, [3,8,36,3])
+
+
+def test():
+    net = PreActResNet18()
+    y = net((torch.randn(1,3,32,32)))
+    print(y.size())
+
+# test()
--- a/models/Old/regnet.py
+++ b/models/Old/regnet.py
@ -0,0 +1,155 @@
+'''RegNet in PyTorch.
+
+Paper: "Designing Network Design Spaces".
+
+Reference: https://github.com/keras-team/keras-applications/blob/master/keras_applications/efficientnet.py
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class SE(nn.Module):
+    '''Squeeze-and-Excitation block.'''
+
+    def __init__(self, in_planes, se_planes):
+        super(SE, self).__init__()
+        self.se1 = nn.Conv2d(in_planes, se_planes, kernel_size=1, bias=True)
+        self.se2 = nn.Conv2d(se_planes, in_planes, kernel_size=1, bias=True)
+
+    def forward(self, x):
+        out = F.adaptive_avg_pool2d(x, (1, 1))
+        out = F.relu(self.se1(out))
+        out = self.se2(out).sigmoid()
+        out = x * out
+        return out
+
+
+class Block(nn.Module):
+    def __init__(self, w_in, w_out, stride, group_width, bottleneck_ratio, se_ratio):
+        super(Block, self).__init__()
+        # 1x1
+        w_b = int(round(w_out * bottleneck_ratio))
+        self.conv1 = nn.Conv2d(w_in, w_b, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(w_b)
+        # 3x3
+        num_groups = w_b // group_width
+        self.conv2 = nn.Conv2d(w_b, w_b, kernel_size=3,
+                               stride=stride, padding=1, groups=num_groups, bias=False)
+        self.bn2 = nn.BatchNorm2d(w_b)
+        # se
+        self.with_se = se_ratio > 0
+        if self.with_se:
+            w_se = int(round(w_in * se_ratio))
+            self.se = SE(w_b, w_se)
+        # 1x1
+        self.conv3 = nn.Conv2d(w_b, w_out, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(w_out)
+
+        self.shortcut = nn.Sequential()
+        if stride != 1 or w_in != w_out:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(w_in, w_out,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(w_out)
+            )
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = F.relu(self.bn2(self.conv2(out)))
+        if self.with_se:
+            out = self.se(out)
+        out = self.bn3(self.conv3(out))
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+
+
+class RegNet(nn.Module):
+    def __init__(self, cfg, num_classes=10):
+        super(RegNet, self).__init__()
+        self.cfg = cfg
+        self.in_planes = 64
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
+                               stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.layer1 = self._make_layer(0)
+        self.layer2 = self._make_layer(1)
+        self.layer3 = self._make_layer(2)
+        self.layer4 = self._make_layer(3)
+        self.linear = nn.Linear(self.cfg['widths'][-1], num_classes)
+
+    def _make_layer(self, idx):
+        depth = self.cfg['depths'][idx]
+        width = self.cfg['widths'][idx]
+        stride = self.cfg['strides'][idx]
+        group_width = self.cfg['group_width']
+        bottleneck_ratio = self.cfg['bottleneck_ratio']
+        se_ratio = self.cfg['se_ratio']
+
+        layers = []
+        for i in range(depth):
+            s = stride if i == 0 else 1
+            layers.append(Block(self.in_planes, width,
+                                s, group_width, bottleneck_ratio, se_ratio))
+            self.in_planes = width
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = self.layer4(out)
+        out = F.adaptive_avg_pool2d(out, (1, 1))
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+
+
+def RegNetX_200MF():
+    cfg = {
+        'depths': [1, 1, 4, 7],
+        'widths': [24, 56, 152, 368],
+        'strides': [1, 1, 2, 2],
+        'group_width': 8,
+        'bottleneck_ratio': 1,
+        'se_ratio': 0,
+    }
+    return RegNet(cfg)
+
+
+def RegNetX_400MF():
+    cfg = {
+        'depths': [1, 2, 7, 12],
+        'widths': [32, 64, 160, 384],
+        'strides': [1, 1, 2, 2],
+        'group_width': 16,
+        'bottleneck_ratio': 1,
+        'se_ratio': 0,
+    }
+    return RegNet(cfg)
+
+
+def RegNetY_400MF():
+    cfg = {
+        'depths': [1, 2, 7, 12],
+        'widths': [32, 64, 160, 384],
+        'strides': [1, 1, 2, 2],
+        'group_width': 16,
+        'bottleneck_ratio': 1,
+        'se_ratio': 0.25,
+    }
+    return RegNet(cfg)
+
+
+def test():
+    net = RegNetX_200MF()
+    print(net)
+    x = torch.randn(2, 3, 32, 32)
+    y = net(x)
+    print(y.shape)
+
+
+if __name__ == '__main__':
+    test()
--- a/models/Old/resnet.py
+++ b/models/Old/resnet.py
@ -0,0 +1,132 @@
+'''ResNet in PyTorch.
+
+For Pre-activation ResNet, see 'preact_resnet.py'.
+
+Reference:
+[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
+    Deep Residual Learning for Image Recognition. arXiv:1512.03385
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, in_planes, planes, stride=1):
+        super(BasicBlock, self).__init__()
+        self.conv1 = nn.Conv2d(
+            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
+                               stride=1, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != self.expansion*planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, self.expansion*planes,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(self.expansion*planes)
+            )
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.bn2(self.conv2(out))
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+
+
+class Bottleneck(nn.Module):
+    expansion = 4
+
+    def __init__(self, in_planes, planes, stride=1):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
+                               stride=stride, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(planes, self.expansion *
+                               planes, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(self.expansion*planes)
+
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != self.expansion*planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, self.expansion*planes,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(self.expansion*planes)
+            )
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = F.relu(self.bn2(self.conv2(out)))
+        out = self.bn3(self.conv3(out))
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+
+
+class ResNet(nn.Module):
+    def __init__(self, block, num_blocks, num_classes=10):
+        super(ResNet, self).__init__()
+        self.in_planes = 64
+
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
+                               stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
+        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
+        self.linear = nn.Linear(512*block.expansion, num_classes)
+
+    def _make_layer(self, block, planes, num_blocks, stride):
+        strides = [stride] + [1]*(num_blocks-1)
+        layers = []
+        for stride in strides:
+            layers.append(block(self.in_planes, planes, stride))
+            self.in_planes = planes * block.expansion
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = self.layer4(out)
+        out = F.avg_pool2d(out, 4)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+
+
+def ResNet18():
+    return ResNet(BasicBlock, [2, 2, 2, 2])
+
+
+def ResNet34():
+    return ResNet(BasicBlock, [3, 4, 6, 3])
+
+
+def ResNet50():
+    return ResNet(Bottleneck, [3, 4, 6, 3])
+
+
+def ResNet101():
+    return ResNet(Bottleneck, [3, 4, 23, 3])
+
+
+def ResNet152():
+    return ResNet(Bottleneck, [3, 8, 36, 3])
+
+
+def test():
+    net = ResNet18()
+    y = net(torch.randn(1, 3, 32, 32))
+    print(y.size())
+
+# test()
--- a/models/Old/resnext.py
+++ b/models/Old/resnext.py
@ -0,0 +1,95 @@
+'''ResNeXt in PyTorch.
+
+See the paper "Aggregated Residual Transformations for Deep Neural Networks" for more details.
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class Block(nn.Module):
+    '''Grouped convolution block.'''
+    expansion = 2
+
+    def __init__(self, in_planes, cardinality=32, bottleneck_width=4, stride=1):
+        super(Block, self).__init__()
+        group_width = cardinality * bottleneck_width
+        self.conv1 = nn.Conv2d(in_planes, group_width, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(group_width)
+        self.conv2 = nn.Conv2d(group_width, group_width, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False)
+        self.bn2 = nn.BatchNorm2d(group_width)
+        self.conv3 = nn.Conv2d(group_width, self.expansion*group_width, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(self.expansion*group_width)
+
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != self.expansion*group_width:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, self.expansion*group_width, kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(self.expansion*group_width)
+            )
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = F.relu(self.bn2(self.conv2(out)))
+        out = self.bn3(self.conv3(out))
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+
+
+class ResNeXt(nn.Module):
+    def __init__(self, num_blocks, cardinality, bottleneck_width, num_classes=10):
+        super(ResNeXt, self).__init__()
+        self.cardinality = cardinality
+        self.bottleneck_width = bottleneck_width
+        self.in_planes = 64
+
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.layer1 = self._make_layer(num_blocks[0], 1)
+        self.layer2 = self._make_layer(num_blocks[1], 2)
+        self.layer3 = self._make_layer(num_blocks[2], 2)
+        # self.layer4 = self._make_layer(num_blocks[3], 2)
+        self.linear = nn.Linear(cardinality*bottleneck_width*8, num_classes)
+
+    def _make_layer(self, num_blocks, stride):
+        strides = [stride] + [1]*(num_blocks-1)
+        layers = []
+        for stride in strides:
+            layers.append(Block(self.in_planes, self.cardinality, self.bottleneck_width, stride))
+            self.in_planes = Block.expansion * self.cardinality * self.bottleneck_width
+        # Increase bottleneck_width by 2 after each stage.
+        self.bottleneck_width *= 2
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        # out = self.layer4(out)
+        out = F.avg_pool2d(out, 8)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+
+
+def ResNeXt29_2x64d():
+    return ResNeXt(num_blocks=[3,3,3], cardinality=2, bottleneck_width=64)
+
+def ResNeXt29_4x64d():
+    return ResNeXt(num_blocks=[3,3,3], cardinality=4, bottleneck_width=64)
+
+def ResNeXt29_8x64d():
+    return ResNeXt(num_blocks=[3,3,3], cardinality=8, bottleneck_width=64)
+
+def ResNeXt29_32x4d():
+    return ResNeXt(num_blocks=[3,3,3], cardinality=32, bottleneck_width=4)
+
+def test_resnext():
+    net = ResNeXt29_2x64d()
+    x = torch.randn(1,3,32,32)
+    y = net(x)
+    print(y.size())
+
+# test_resnext()
--- a/models/Old/sconv2davg.py
+++ b/models/Old/sconv2davg.py
@ -0,0 +1,140 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+import math
+
+class SConv2dAvg(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1,ceil_mode=True):
+        super(SConv2dAvg, self).__init__()
+        conv = nn.Conv2d(in_channels, out_channels, kernel_size)
+        self.deconv = nn.ConvTranspose2d(1, 1, kernel_size, 1, padding=0, output_padding=0, groups=1, bias=False, dilation=1, padding_mode='zeros')
+        nn.init.constant_(self.deconv.weight, 1)
+        self.pooldeconv = nn.ConvTranspose2d(1, 1, kernel_size=stride,padding=0,stride=stride, output_padding=0, groups=1, bias=False, dilation=1, padding_mode='zeros')
+        nn.init.constant_(self.pooldeconv.weight, 1)
+        self.weight = nn.Parameter(conv.weight)
+        self.bias = nn.Parameter(conv.bias)
+        self.stride = stride       
+        self.dilation = dilation 
+        self.padding = padding
+        self.kernel_size = kernel_size
+        self.ceil_mode = ceil_mode
+       
+    def forward(self, input, selh=-torch.ones(1,1), selw=-torch.ones(1,1), mask=-torch.ones(1,1),stoch=False,stride=-1):
+        device=input.device
+        if stride==-1:
+            stride = self.stride
+        #stoch=True
+        if stoch==False:
+            stride=1 #test with real average pooling
+        batch_size, in_channels, in_h, in_w = input.shape
+        out_channels, in_channels, kh, kw =  self.weight.shape
+
+        afterconv_h = in_h-(kh-1) #size after conv
+        afterconv_w = in_w-(kw-1)
+        if self.ceil_mode:
+            out_h = math.ceil(afterconv_h/stride)
+            out_w = math.ceil(afterconv_w/stride)
+        else:
+            out_h = math.floor(afterconv_h/stride)
+            out_w = math.floor(afterconv_w/stride)
+        unfold = torch.nn.Unfold(kernel_size=(kh, kw), dilation=self.dilation, padding=self.padding, stride=1)
+        inp_unf = unfold(input)
+        if stride!=1:
+            inp_unf = inp_unf.view(batch_size,in_channels*kh*kw,afterconv_h,afterconv_w)
+            if selh[0,0]==-1:
+                resth = (out_h*stride)-afterconv_h
+                restw = (out_w*stride)-afterconv_w
+                selh = torch.randint(stride,(out_h,out_w), device=device)
+                selw = torch.randint(stride,(out_h,out_w), device=device)
+                # print(selh.shape)
+                if resth!=0:
+                    # Cas : (stride-resth)=0 ?
+                    selh[-1,:]=selh[-1,:]%(stride-resth);selh[:,-1]=selh[:,-1]%(stride-restw)
+                    selw[-1,:]=selw[-1,:]%(stride-resth);selw[:,-1]=selw[:,-1]%(stride-restw)
+            rng_h = selh + torch.arange(0,out_h*stride,stride,device=device).view(-1,1)
+            rng_w = selw + torch.arange(0,out_w*stride,stride,device=device)
+           
+            if mask[0,0]==-1:
+                inp_unf = inp_unf[:,:,rng_h,rng_w].view(batch_size,in_channels*kh*kw,-1)
+            else:
+                inp_unf = inp_unf[:,:,rng_h[mask>0],rng_w[mask>0]]
+
+        #Matrix mul
+        if self.bias is None:
+            out_unf = inp_unf.transpose(1, 2).matmul(self.weight.view(self.weight.size(0), -1).t()).transpose(1, 2)
+        else:
+            out_unf = (inp_unf.transpose(1, 2).matmul(self.weight.view(self.weight.size(0), -1).t()) + self.bias).transpose(1, 2)
+
+        if stride==1 or mask[0,0]==-1:
+            out = out_unf.view(batch_size,out_channels,out_h,out_w) #Fold
+            if stoch==False:
+                out = F.avg_pool2d(out,self.stride,ceil_mode=True)
+        else:
+            out = torch.zeros(batch_size, out_channels,out_h,out_w,device=device)
+            out[:,:,mask>0] = out_unf
+        return out        
+
+    def comp(self,h,w,mask=-torch.ones(1,1)):
+        out_h = (h-(self.kernel_size))/self.stride
+        out_w = (w-(self.kernel_size))/self.stride
+        if self.ceil_mode:
+            out_h = math.ceil(out_h)
+            out_w = math.ceil(out_w)
+        else:
+            out_h = math.floor(out_h)
+            out_w = math.florr(out_w)
+        if mask[0,0]==-1:
+            comp = self.weight.numel()*out_h*out_w 
+        else:
+            comp = self.weight.numel()*(mask>0).sum()
+        return comp
+
+    def sample(self,h,w,mask):
+        '''
+            h, w : forward input shape
+            mask : mask of output used in computation
+        '''
+        stride = self.stride
+        out_channels, in_channels, kh, kw =  self.weight.shape
+        device=mask.device
+
+        afterconv_h = h-(kh-1) #Pk afterconv ?
+        afterconv_w = w-(kw-1)
+        if self.ceil_mode:
+            out_h = math.ceil(afterconv_h/stride)
+            out_w = math.ceil(afterconv_w/stride)
+        else:
+            out_h = math.floor(afterconv_h/stride)
+            out_w = math.floor(afterconv_w/stride)
+        selh = torch.randint(stride,(out_h,out_w), device=device)
+        selw = torch.randint(stride,(out_h,out_w), device=device)
+
+        resth = (out_h*stride)-afterconv_h #simplement egale a stride-1, non ?
+        restw = (out_w*stride)-afterconv_w
+        # print('resth', resth, self.stride)
+        if resth!=0:
+            selh[-1,:]=selh[-1,:]%(stride-resth);selh[:,-1]=selh[:,-1]%(stride-restw)
+            selw[-1,:]=selw[-1,:]%(stride-resth);selw[:,-1]=selw[:,-1]%(stride-restw)
+        maskh = (out_h)*stride
+        maskw = (out_w)*stride
+        rng_h = selh + torch.arange(0,out_h*stride,stride,device=device).view(-1,1)
+        rng_w = selw + torch.arange(0,out_w*stride,stride,device=device)
+        # rng_w = selw + torch.arange(0,out_w*self.stride,self.stride,device=device).view(-1,1)
+        nmask = torch.zeros((maskh,maskw),device=device)
+        nmask[rng_h,rng_w] = 1
+        #rmask = mask * nmask
+        dmask = self.pooldeconv(mask.float().view(1,1,mask.shape[0],mask.shape[1]))
+        rmask = nmask * dmask
+        #rmask = rmask[:,:,:out_h,:out_w]
+        fmask = self.deconv(rmask)
+        fmask = fmask[0,0]
+        return selh,selw,fmask.long()
+
+    def get_size(self,h,w):
+        # newh=(h-(self.kernel_size-1)+(self.stride-1))/self.stride
+        # neww=(w-(self.kernel_size-1)+(self.stride-1))/self.stride
+        # print(newh,neww)
+        newh=math.floor(((h + 2*self.padding - self.dilation*(self.kernel_size-1) - 1)/self.stride) + 1)
+        neww=math.floor(((w + 2*self.padding - self.dilation*(self.kernel_size-1) - 1)/self.stride) + 1)
+        return newh, neww
--- a/models/Old/senet.py
+++ b/models/Old/senet.py
@ -0,0 +1,121 @@
+'''SENet in PyTorch.
+
+SENet is the winner of ImageNet-2017. The paper is not released yet.
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class BasicBlock(nn.Module):
+    def __init__(self, in_planes, planes, stride=1):
+        super(BasicBlock, self).__init__()
+        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(planes)
+            )
+
+        # SE layers
+        self.fc1 = nn.Conv2d(planes, planes//16, kernel_size=1)  # Use nn.Conv2d instead of nn.Linear
+        self.fc2 = nn.Conv2d(planes//16, planes, kernel_size=1)
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.bn2(self.conv2(out))
+
+        # Squeeze
+        w = F.avg_pool2d(out, out.size(2))
+        w = F.relu(self.fc1(w))
+        w = F.sigmoid(self.fc2(w))
+        # Excitation
+        out = out * w  # New broadcasting feature from v0.2!
+
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+
+
+class PreActBlock(nn.Module):
+    def __init__(self, in_planes, planes, stride=1):
+        super(PreActBlock, self).__init__()
+        self.bn1 = nn.BatchNorm2d(in_planes)
+        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
+
+        if stride != 1 or in_planes != planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False)
+            )
+
+        # SE layers
+        self.fc1 = nn.Conv2d(planes, planes//16, kernel_size=1)
+        self.fc2 = nn.Conv2d(planes//16, planes, kernel_size=1)
+
+    def forward(self, x):
+        out = F.relu(self.bn1(x))
+        shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
+        out = self.conv1(out)
+        out = self.conv2(F.relu(self.bn2(out)))
+
+        # Squeeze
+        w = F.avg_pool2d(out, out.size(2))
+        w = F.relu(self.fc1(w))
+        w = F.sigmoid(self.fc2(w))
+        # Excitation
+        out = out * w
+
+        out += shortcut
+        return out
+
+
+class SENet(nn.Module):
+    def __init__(self, block, num_blocks, num_classes=10):
+        super(SENet, self).__init__()
+        self.in_planes = 64
+
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.layer1 = self._make_layer(block,  64, num_blocks[0], stride=1)
+        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
+        self.linear = nn.Linear(512, num_classes)
+
+    def _make_layer(self, block, planes, num_blocks, stride):
+        strides = [stride] + [1]*(num_blocks-1)
+        layers = []
+        for stride in strides:
+            layers.append(block(self.in_planes, planes, stride))
+            self.in_planes = planes
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = self.layer4(out)
+        out = F.avg_pool2d(out, 4)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+
+
+def SENet18():
+    return SENet(PreActBlock, [2,2,2,2])
+
+
+def test():
+    net = SENet18()
+    y = net(torch.randn(1,3,32,32))
+    print(y.size())
+
+# test()
--- a/models/Old/shufflenet.py
+++ b/models/Old/shufflenet.py
@ -0,0 +1,109 @@
+'''ShuffleNet in PyTorch.
+
+See the paper "ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices" for more details.
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class ShuffleBlock(nn.Module):
+    def __init__(self, groups):
+        super(ShuffleBlock, self).__init__()
+        self.groups = groups
+
+    def forward(self, x):
+        '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]'''
+        N,C,H,W = x.size()
+        g = self.groups
+        return x.view(N,g,C//g,H,W).permute(0,2,1,3,4).reshape(N,C,H,W)
+
+
+class Bottleneck(nn.Module):
+    def __init__(self, in_planes, out_planes, stride, groups):
+        super(Bottleneck, self).__init__()
+        self.stride = stride
+
+        mid_planes = out_planes/4
+        g = 1 if in_planes==24 else groups
+        self.conv1 = nn.Conv2d(in_planes, mid_planes, kernel_size=1, groups=g, bias=False)
+        self.bn1 = nn.BatchNorm2d(mid_planes)
+        self.shuffle1 = ShuffleBlock(groups=g)
+        self.conv2 = nn.Conv2d(mid_planes, mid_planes, kernel_size=3, stride=stride, padding=1, groups=mid_planes, bias=False)
+        self.bn2 = nn.BatchNorm2d(mid_planes)
+        self.conv3 = nn.Conv2d(mid_planes, out_planes, kernel_size=1, groups=groups, bias=False)
+        self.bn3 = nn.BatchNorm2d(out_planes)
+
+        self.shortcut = nn.Sequential()
+        if stride == 2:
+            self.shortcut = nn.Sequential(nn.AvgPool2d(3, stride=2, padding=1))
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.shuffle1(out)
+        out = F.relu(self.bn2(self.conv2(out)))
+        out = self.bn3(self.conv3(out))
+        res = self.shortcut(x)
+        out = F.relu(torch.cat([out,res], 1)) if self.stride==2 else F.relu(out+res)
+        return out
+
+
+class ShuffleNet(nn.Module):
+    def __init__(self, cfg):
+        super(ShuffleNet, self).__init__()
+        out_planes = cfg['out_planes']
+        num_blocks = cfg['num_blocks']
+        groups = cfg['groups']
+
+        self.conv1 = nn.Conv2d(3, 24, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(24)
+        self.in_planes = 24
+        self.layer1 = self._make_layer(out_planes[0], num_blocks[0], groups)
+        self.layer2 = self._make_layer(out_planes[1], num_blocks[1], groups)
+        self.layer3 = self._make_layer(out_planes[2], num_blocks[2], groups)
+        self.linear = nn.Linear(out_planes[2], 10)
+
+    def _make_layer(self, out_planes, num_blocks, groups):
+        layers = []
+        for i in range(num_blocks):
+            stride = 2 if i == 0 else 1
+            cat_planes = self.in_planes if i == 0 else 0
+            layers.append(Bottleneck(self.in_planes, out_planes-cat_planes, stride=stride, groups=groups))
+            self.in_planes = out_planes
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = F.avg_pool2d(out, 4)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+
+
+def ShuffleNetG2():
+    cfg = {
+        'out_planes': [200,400,800],
+        'num_blocks': [4,8,4],
+        'groups': 2
+    }
+    return ShuffleNet(cfg)
+
+def ShuffleNetG3():
+    cfg = {
+        'out_planes': [240,480,960],
+        'num_blocks': [4,8,4],
+        'groups': 3
+    }
+    return ShuffleNet(cfg)
+
+
+def test():
+    net = ShuffleNetG2()
+    x = torch.randn(1,3,32,32)
+    y = net(x)
+    print(y)
+
+# test()
--- a/models/Old/shufflenetv2.py
+++ b/models/Old/shufflenetv2.py
@ -0,0 +1,162 @@
+'''ShuffleNetV2 in PyTorch.
+
+See the paper "ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" for more details.
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class ShuffleBlock(nn.Module):
+    def __init__(self, groups=2):
+        super(ShuffleBlock, self).__init__()
+        self.groups = groups
+
+    def forward(self, x):
+        '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]'''
+        N, C, H, W = x.size()
+        g = self.groups
+        return x.view(N, g, C//g, H, W).permute(0, 2, 1, 3, 4).reshape(N, C, H, W)
+
+
+class SplitBlock(nn.Module):
+    def __init__(self, ratio):
+        super(SplitBlock, self).__init__()
+        self.ratio = ratio
+
+    def forward(self, x):
+        c = int(x.size(1) * self.ratio)
+        return x[:, :c, :, :], x[:, c:, :, :]
+
+
+class BasicBlock(nn.Module):
+    def __init__(self, in_channels, split_ratio=0.5):
+        super(BasicBlock, self).__init__()
+        self.split = SplitBlock(split_ratio)
+        in_channels = int(in_channels * split_ratio)
+        self.conv1 = nn.Conv2d(in_channels, in_channels,
+                               kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(in_channels)
+        self.conv2 = nn.Conv2d(in_channels, in_channels,
+                               kernel_size=3, stride=1, padding=1, groups=in_channels, bias=False)
+        self.bn2 = nn.BatchNorm2d(in_channels)
+        self.conv3 = nn.Conv2d(in_channels, in_channels,
+                               kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(in_channels)
+        self.shuffle = ShuffleBlock()
+
+    def forward(self, x):
+        x1, x2 = self.split(x)
+        out = F.relu(self.bn1(self.conv1(x2)))
+        out = self.bn2(self.conv2(out))
+        out = F.relu(self.bn3(self.conv3(out)))
+        out = torch.cat([x1, out], 1)
+        out = self.shuffle(out)
+        return out
+
+
+class DownBlock(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super(DownBlock, self).__init__()
+        mid_channels = out_channels // 2
+        # left
+        self.conv1 = nn.Conv2d(in_channels, in_channels,
+                               kernel_size=3, stride=2, padding=1, groups=in_channels, bias=False)
+        self.bn1 = nn.BatchNorm2d(in_channels)
+        self.conv2 = nn.Conv2d(in_channels, mid_channels,
+                               kernel_size=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(mid_channels)
+        # right
+        self.conv3 = nn.Conv2d(in_channels, mid_channels,
+                               kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(mid_channels)
+        self.conv4 = nn.Conv2d(mid_channels, mid_channels,
+                               kernel_size=3, stride=2, padding=1, groups=mid_channels, bias=False)
+        self.bn4 = nn.BatchNorm2d(mid_channels)
+        self.conv5 = nn.Conv2d(mid_channels, mid_channels,
+                               kernel_size=1, bias=False)
+        self.bn5 = nn.BatchNorm2d(mid_channels)
+
+        self.shuffle = ShuffleBlock()
+
+    def forward(self, x):
+        # left
+        out1 = self.bn1(self.conv1(x))
+        out1 = F.relu(self.bn2(self.conv2(out1)))
+        # right
+        out2 = F.relu(self.bn3(self.conv3(x)))
+        out2 = self.bn4(self.conv4(out2))
+        out2 = F.relu(self.bn5(self.conv5(out2)))
+        # concat
+        out = torch.cat([out1, out2], 1)
+        out = self.shuffle(out)
+        return out
+
+
+class ShuffleNetV2(nn.Module):
+    def __init__(self, net_size):
+        super(ShuffleNetV2, self).__init__()
+        out_channels = configs[net_size]['out_channels']
+        num_blocks = configs[net_size]['num_blocks']
+
+        self.conv1 = nn.Conv2d(3, 24, kernel_size=3,
+                               stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(24)
+        self.in_channels = 24
+        self.layer1 = self._make_layer(out_channels[0], num_blocks[0])
+        self.layer2 = self._make_layer(out_channels[1], num_blocks[1])
+        self.layer3 = self._make_layer(out_channels[2], num_blocks[2])
+        self.conv2 = nn.Conv2d(out_channels[2], out_channels[3],
+                               kernel_size=1, stride=1, padding=0, bias=False)
+        self.bn2 = nn.BatchNorm2d(out_channels[3])
+        self.linear = nn.Linear(out_channels[3], 10)
+
+    def _make_layer(self, out_channels, num_blocks):
+        layers = [DownBlock(self.in_channels, out_channels)]
+        for i in range(num_blocks):
+            layers.append(BasicBlock(out_channels))
+            self.in_channels = out_channels
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        # out = F.max_pool2d(out, 3, stride=2, padding=1)
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = F.relu(self.bn2(self.conv2(out)))
+        out = F.avg_pool2d(out, 4)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+
+
+configs = {
+    0.5: {
+        'out_channels': (48, 96, 192, 1024),
+        'num_blocks': (3, 7, 3)
+    },
+
+    1: {
+        'out_channels': (116, 232, 464, 1024),
+        'num_blocks': (3, 7, 3)
+    },
+    1.5: {
+        'out_channels': (176, 352, 704, 1024),
+        'num_blocks': (3, 7, 3)
+    },
+    2: {
+        'out_channels': (224, 488, 976, 2048),
+        'num_blocks': (3, 7, 3)
+    }
+}
+
+
+def test():
+    net = ShuffleNetV2(net_size=0.5)
+    x = torch.randn(3, 3, 32, 32)
+    y = net(x)
+    print(y.shape)
+
+
+# test()
--- a/models/Old/vgg.py
+++ b/models/Old/vgg.py
@ -0,0 +1,47 @@
+'''VGG11/13/16/19 in Pytorch.'''
+import torch
+import torch.nn as nn
+
+
+cfg = {
+    'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
+    'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
+    'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
+    'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
+}
+
+
+class VGG(nn.Module):
+    def __init__(self, vgg_name):
+        super(VGG, self).__init__()
+        self.features = self._make_layers(cfg[vgg_name])
+        self.classifier = nn.Linear(512, 10)
+
+    def forward(self, x):
+        out = self.features(x)
+        out = out.view(out.size(0), -1)
+        out = self.classifier(out)
+        return out
+
+    def _make_layers(self, cfg):
+        layers = []
+        in_channels = 3
+        for x in cfg:
+            if x == 'M':
+                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
+            else:
+                layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
+                           nn.BatchNorm2d(x),
+                           nn.ReLU(inplace=True)]
+                in_channels = x
+        layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
+        return nn.Sequential(*layers)
+
+
+def test():
+    net = VGG('VGG11')
+    x = torch.randn(2,3,32,32)
+    y = net(x)
+    print(y.size())
+
+# test()
--- a/models/init.py
+++ b/models/init.py
@ -0,0 +1,2 @@
+from .mylenet4 import *
+from .myresnet3 import *
--- a/models/mylenet4.py
+++ b/models/mylenet4.py
@ -0,0 +1,314 @@
+'''LeNet in PyTorch.'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from .stoch import SConv2dAvg
+from .stochsim import savg_pool2d
+
+class MyLeNetNormal(nn.Module):#epoch 12s
+    def __init__(self):
+        super(MyLeNetNormal, self).__init__()
+        self.conv1 = nn.Conv2d(3, 200, 3, stride=1)
+        self.conv2 = nn.Conv2d(200, 400, 3, stride=1)
+        self.conv3 = nn.Conv2d(400, 800, 3, stride=1)
+        self.conv4 = nn.Conv2d(800, 10, 3, stride=1)
+        #self.fc1   = nn.Linear(800, 10)
+
+    def forward(self, x, stoch=True):
+
+        out = F.relu(self.conv1(x))
+        out = F.avg_pool2d(out,2,ceil_mode=True)
+        out = F.relu(self.conv2(out))
+        out = F.avg_pool2d(out,2,ceil_mode=True)
+        out = F.relu(self.conv3(out))
+        out = F.avg_pool2d(out,2,ceil_mode=True)
+        out = self.conv4(out)
+        #out = F.avg_pool2d(out,2,ceil_mode=True)
+
+        out = out.view(out.size(0), -1 )
+        #out = (self.fc1(out))
+        return out
+
+class MyLeNetSimNormal(nn.Module):#epoch 12s
+    def __init__(self):
+        super(MyLeNetSimNormal, self).__init__()
+        self.conv1 = nn.Conv2d(3, 200, 3, stride=1)
+        self.conv2 = nn.Conv2d(200, 400, 3, stride=1)
+        self.conv3 = nn.Conv2d(400, 800, 3, stride=1)
+        self.conv4 = nn.Conv2d(800, 10, 3, stride=1)
+        #self.fc1   = nn.Linear(800, 10)
+
+    def forward(self, x, stoch=True):
+
+        out = F.relu(self.conv1(x))
+        # out = self.savg_pool2d(out,2,ceil_mode=True)
+        out = savg_pool2d(out,2, mode='s', ceil_mode=True)
+        out = F.relu(self.conv2(out))
+        # out = self.savg_pool2d(out,2,ceil_mode=True)
+        out = savg_pool2d(out,2, mode='s', ceil_mode=True)
+        out = F.relu(self.conv3(out))
+        # out = self.savg_pool2d(out,2,ceil_mode=True)
+        out = savg_pool2d(out,2, mode='s', ceil_mode=True)
+        out = self.conv4(out)
+        #out = F.avg_pool2d(out,2,ceil_mode=True)
+
+        out = out.view(out.size(0), -1 )
+        #out = (self.fc1(out))
+        return out
+
+
+class MyLeNetStride(nn.Module):#epoch 6s
+    def __init__(self):
+        super(MyLeNetStride, self).__init__()
+        self.conv1 = nn.Conv2d(3, 200, 3, stride=2)
+        self.conv2 = nn.Conv2d(200, 400, 3, stride=2)
+        self.conv3 = nn.Conv2d(400, 800, 3, stride=2)
+        self.conv4 = nn.Conv2d(800, 10, 3, stride=1)
+        #self.fc1   = nn.Linear(800, 10)
+
+    def forward(self, x, stoch=True):
+
+        out = F.relu(self.conv1(x))
+        out = F.relu(self.conv2(out))
+        out = F.relu(self.conv3(out))
+        out = self.conv4(out)
+
+        out = out.view(out.size(0), -1 )
+        #out = (self.fc1(out))
+        return out
+
+class MyLeNetMatNormal(nn.Module):#epach 21s
+    def __init__(self):
+        super(MyLeNetMatNormal, self).__init__()
+        self.conv1 = SConv2dAvg(3, 200, 3, stride=1)
+        self.conv2 = SConv2dAvg(200, 400, 3, stride=1)
+        self.conv3 = SConv2dAvg(400, 800, 3, stride=1)
+        self.conv4 = SConv2dAvg(800, 10, 3, stride=1)
+        #self.fc1   = nn.Linear(800, 10)
+
+    def forward(self, x, stoch=True):
+        out = F.relu(self.conv1(x))
+        out = F.avg_pool2d(out,2,ceil_mode=True)
+        out = F.relu(self.conv2(out))
+        out = F.avg_pool2d(out,2,ceil_mode=True)
+        out = F.relu(self.conv3(out))
+        out = F.avg_pool2d(out,2,ceil_mode=True)
+        out = (self.conv4(out))
+        #out = F.avg_pool2d(out,1,ceil_mode=True)
+
+        out = out.view(out.size(0), -1 )
+        #out = (self.fc1(out))
+        return out
+
+class MyLeNetMatStoch(nn.Module):#epoch 17s
+    def __init__(self):
+        super(MyLeNetMatStoch, self).__init__()
+        self.conv1 = SConv2dAvg(3, 200, 3, stride=2)
+        self.conv2 = SConv2dAvg(200, 400, 3, stride=2)
+        self.conv3 = SConv2dAvg(400, 800, 3, stride=2)
+        self.conv4 = SConv2dAvg(800, 10, 3, stride=1)
+        #self.fc1   = nn.Linear(800, 10)
+
+    def forward(self, x, stoch=True):
+        print('in',x.shape)
+        out = F.relu(self.conv1(x,stoch=stoch))
+        print('c1',out.shape)
+        out = F.relu(self.conv2(out,stoch=stoch))
+        print('c2', out.shape)
+        out = F.relu(self.conv3(out,stoch=stoch))
+        print('c3',out.shape)
+        #hkjhlg
+        out = self.conv4(out,stoch=stoch)
+        print('c4',out.shape)
+        out = out.view(out.size(0), -1 )
+        #out = self.fc1(out)
+        return out
+    
+class MyLeNetMatStochBU(nn.Module):#epoch 11s 
+    def __init__(self):
+        super(MyLeNetMatStochBU, self).__init__()
+        self.conv1 = SConv2dAvg(3, 200, 3, stride=2)
+        self.conv2 = SConv2dAvg(200, 400, 3, stride=2)
+        self.conv3 = SConv2dAvg(400, 800, 3, stride=2, ceil_mode=True)
+        self.conv4 = SConv2dAvg(800, 10, 3, stride=1)
+        # self.fc1   = nn.Linear(800, 10)
+
+    def forward(self, x, stoch=True):
+        #get sizes
+        h0,w0 = x.shape[2],x.shape[3]
+        h1,w1 = self.conv1.get_size(h0,w0)
+        h2,w2 = self.conv2.get_size(h1,w1)
+        h3,w3 = self.conv3.get_size(h2,w2)
+        print(h0,w0)
+        print(h1,w1)
+        print(h2,w2)
+        print(h3,w3)
+
+        #sample BU
+        mask3 = torch.ones(h3,w3).to(x.device)
+        print(mask3.shape)
+        selh3,selw3,mask2 = self.conv3.sample(h2,w2,mask=mask3)
+        print(mask2.shape)
+        selh2,selw2,mask1 = self.conv2.sample(h1,w1,mask=mask2)
+        print(mask1.shape)
+        selh1,selw1,mask0 = self.conv1.sample(h0,w0,mask=mask1)
+        #forward
+        out = F.relu(self.conv1(x,selh1,selw1,mask1,stoch=stoch))
+        out = F.relu(self.conv2(out,selh2,selw2,mask2,stoch=stoch))
+        out = F.relu(self.conv3(out,selh3,selw3,mask3,stoch=stoch))
+
+        out = self.conv4(out,stoch=stoch)
+        out = out.view(out.size(0), -1 )
+        # out = (self.fc1(out))
+        return out
+
+# class SConv2dAvg(nn.Module):
+#     def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1):
+#         super(SConv2dAvg, self).__init__()
+#         conv = nn.Conv2d(in_channels, out_channels, kernel_size)
+#         self.deconv = nn.ConvTranspose2d(1, 1, kernel_size, 1, padding=0, output_padding=0, groups=1, bias=False, dilation=1, padding_mode='zeros')
+#         nn.init.constant_(self.deconv.weight, 1)
+#         self.pooldeconv = nn.ConvTranspose2d(1, 1, kernel_size=stride,padding=0,stride=stride, output_padding=0, groups=1, bias=False, dilation=1, padding_mode='zeros')
+#         nn.init.constant_(self.pooldeconv.weight, 1)
+#         self.weight = nn.Parameter(conv.weight)
+#         self.bias = nn.Parameter(conv.bias)
+#         self.stride = stride       
+#         self.dilation = dilation 
+#         self.padding = padding
+#         self.kernel_size = kernel_size
+       
+#     def forward(self, input, selh=-torch.ones(1,1), selw=-torch.ones(1,1), mask=-torch.ones(1,1),stoch=True):
+#         stride = self.stride
+#         if stoch==False:
+#             stride=1
+#         batch_size, in_channels, in_h, in_w = input.shape
+#         out_channels, in_channels, kh, kw =  self.weight.shape
+#         afterconv_h = in_h-(kh-1)
+#         afterconv_w = in_w-(kw-1)
+#         out_h = int((afterconv_h+stride-1)/stride)
+#         out_w = int((afterconv_w+stride-1)/stride)
+        
+#         unfold = torch.nn.Unfold(kernel_size=(kh, kw), dilation=self.dilation, padding=self.padding, stride=1)
+#         inp_unf = unfold(input)
+#         if stride!=1:
+#             inp_unf = inp_unf.view(batch_size,in_channels*kh*kw,afterconv_h,afterconv_w)
+#             if selh[0,0]==-1:
+#                 resth = (out_h*stride)-afterconv_h
+#                 restw = (out_w*stride)-afterconv_w
+#                 selh = torch.cuda.LongTensor(out_h,out_w).random_(0, stride)
+#                 selw = torch.cuda.LongTensor(out_h,out_w).random_(0, stride)
+#                 #if resth!=0:
+#                 #    selh[-1,:]=selh[-1,:]%(stride-resth);selh[:,-1]=selh[:,-1]%(stride-restw)
+#                 #    selw[-1,:]=selw[-1,:]%(stride-resth);selw[:,-1]=selw[:,-1]%(stride-restw)
+#                 #if mask[0,0]==-1
+#                 #    mask = torch.ones(out_h,out_w,device=torch.device('cuda'))
+#             rng_h = selh + torch.arange(0,out_h*stride,stride,device=torch.device('cuda')).view(-1,1)
+#             rng_w = selw + torch.arange(0,out_w*stride,stride,device=torch.device('cuda'))
+#             if mask[0,0]==-1:
+#                 inp_unf = inp_unf[:,:,rng_h,rng_w].view(batch_size,in_channels*kh*kw,-1)
+#             else:
+#                 inp_unf = inp_unf[:,:,rng_h[mask>0],rng_w[mask>0]]
+
+#         if self.bias is None:
+#             out_unf = inp_unf.transpose(1, 2).matmul(self.weight.view(self.weight.size(0), -1).t()).transpose(1, 2)
+#         else:
+#             out_unf = (inp_unf.transpose(1, 2).matmul(self.weight.view(self.weight.size(0), -1).t()) + self.bias).transpose(1, 2)
+    
+#         if stride==1 or mask[0,0]==-1:
+#             out = out_unf.view(batch_size,out_channels,out_h,out_w)
+#             if stoch==False:
+#                 out = F.avg_pool2d(out,self.stride,ceil_mode=True)
+#         else:
+#             out = torch.zeros(batch_size, out_channels,out_h,out_w,device=torch.device('cuda'))
+#             out[:,:,mask>0] = out_unf
+#         return out        
+
+#     def forward_(self, input, selh=-torch.ones(1,1), selw=-torch.ones(1,1), mask=-torch.ones(1,1),stoch=True):
+#         stride = self.stride
+#         if stoch==False:
+#             stride=1
+#         batch_size, in_channels, in_h, in_w = input.shape
+#         out_channels, in_channels, kh, kw =  self.weight.shape
+#         afterconv_h = in_h-(kh-1)
+#         afterconv_w = in_w-(kw-1)
+#         out_h = (afterconv_h+stride-1)/stride
+#         out_w = (afterconv_w+stride-1)/stride
+        
+#         unfold = torch.nn.Unfold(kernel_size=(kh, kw), dilation=self.dilation, padding=self.padding, stride=1)
+#         inp_unf = unfold(input)
+        
+#         if self.bias is None:
+#             out_unf = inp_unf.transpose(1, 2).matmul(self.weight.view(self.weight.size(0), -1).t()).transpose(1, 2)
+#         else:
+#             out_unf = (inp_unf.transpose(1, 2).matmul(self.weight.view(self.weight.size(0), -1).t()) + self.bias).transpose(1, 2)
+    
+#         out = out_unf.view(batch_size,out_channels,afterconv_h,afterconv_w)
+#         if stoch==False:
+#             out = F.avg_pool2d(out,self.stride,ceil_mode=True)
+#         return out        
+
+#     def sample(self,h,w,mask):
+#         out_channels, in_channels, kh, kw =  self.weight.shape      
+#         afterconv_h = h-(kh-1)
+#         afterconv_w = w-(kw-1)
+#         out_h = (afterconv_h+self.stride-1)/self.stride
+#         out_w = (afterconv_w+self.stride-1)/self.stride
+#         selh = torch.cuda.LongTensor(out_h,out_w).random_(0, self.stride)
+#         selw = torch.cuda.LongTensor(out_h,out_w).random_(0, self.stride)
+#         resth = (out_h*self.stride)-afterconv_h
+#         restw = (out_w*self.stride)-afterconv_w
+#         #print(resth)
+#         #if resth!=0:
+#         #    selh[-1,:]=selh[-1,:]%(self.stride-resth);selh[:,-1]=selh[:,-1]%(self.stride-restw)
+#         #    selw[-1,:]=selw[-1,:]%(self.stride-resth);selw[:,-1]=selw[:,-1]%(self.stride-restw)
+#         maskh = (out_h)*self.stride#-resth#+self.kernel_size-1
+#         maskw = (out_w)*self.stride#-restw#+self.kernel_size-1   
+#         rng_h = selh + torch.arange(0,out_h*self.stride,self.stride,device=torch.device('cuda')).view(-1,1)
+#         rng_w = selw + torch.arange(0,out_w*self.stride,self.stride,device=torch.device('cuda'))
+#         nmask = torch.zeros((maskh,maskw),device=torch.device('cuda'))
+#         nmask[rng_h,rng_w] = 1
+#         #rmask = mask * nmask
+#         dmask = self.pooldeconv(mask.float().view(1,1,mask.shape[0],mask.shape[1]))
+#         rmask = nmask * dmask
+#         #rmask = rmask[:,:,:out_h,:out_w]
+#         fmask = self.deconv(rmask)
+#         fmask = fmask[0,0]
+#         return selh,selw,fmask.long()
+
+#     def get_size(self,h,w):
+#         newh=(h-(self.kernel_size-1)+(self.stride-1))/self.stride
+#         neww=(w-(self.kernel_size-1)+(self.stride-1))/self.stride
+#         return newh,neww
+
+
+# def savg_pool2d(x,size,ceil_mode=False):
+#     b,c,h,w = x.shape
+#     selh = torch.LongTensor(h/size,w/size).random_(0, size)
+#     rngh = torch.arange(0,h,size).long().view(h/size,1).repeat(1,w/size).view(h/size,w/size)
+#     selx = (selh+rngh).repeat(b,c,1,1)
+
+#     selw = torch.LongTensor(h/size,w/size).random_(0, size)
+#     rngw = torch.arange(0,w,size).long().view(1,h/size).repeat(h/size,1).view(h/size,w/size)
+#     sely = (selw+rngw).repeat(b,c,1,1)
+#     bv, cv ,hv, wv = torch.meshgrid([torch.arange(0,b), torch.arange(0,c),torch.arange(0,h/size),torch.arange(0,w/size)])
+#     #x=x.view(b,c,h*w)
+#     newx = x[bv,cv, selx, sely]
+#     #ghdh
+#     return newx
+
+# def savg_pool2d_(x,size,ceil_mode=False):
+#     b,c,h,w = x.shape
+#     selh = torch.cuda.LongTensor(h/size,w/size).random_(0, size)
+#     rngh = torch.arange(0,h,size,device=torch.device('cuda')).view(-1,1)
+#     selx = selh+rngh
+
+#     selw = torch.cuda.LongTensor(h/size,w/size).random_(0, size)
+#     rngw = torch.arange(0,w,size,device=torch.device('cuda'))
+#     sely = selw+rngw
+
+#     #bv, cv ,hv, wv = torch.meshgrid([torch.arange(0,b), torch.arange(0,c),torch.arange(0,h/size),torch.arange(0,w/size)])
+#     #x=x.view(b,c,h*w)
+#     newx = x[:,:, selx, sely]
+#     #ghdh
+#     return newx
--- a/models/myresnet3.py
+++ b/models/myresnet3.py
@ -0,0 +1,167 @@
+'''ResNet in PyTorch.
+
+For Pre-activation ResNet, see 'preact_resnet.py'.
+
+Reference:
+[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
+    Deep Residual Learning for Image Recognition. arXiv:1512.03385
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from .sconv2davg import SConv2dAvg
+
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, in_planes, planes, stride=1, stoch=False):
+        super(BasicBlock, self).__init__()
+        self.conv1 = nn.Conv2d(
+            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+
+        self.stoch=stoch
+        if stoch: 
+            self.conv2 = SConv2dAvg(planes, planes, kernel_size=3,
+                               stride=1, padding=1) #bias=False) #Bias !?
+        else :
+            self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
+                                stride=1, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != self.expansion*planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, self.expansion*planes,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(self.expansion*planes)
+            )
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        if self.stoch:
+            _,_,h1,w1=out.shape
+            h2,w2 = self.conv2.get_size(h1,w1)
+            mask2 = torch.ones((h2,w2), device=x.device)
+            selh2,selw2,mask1 = self.conv2.sample(h1,w1,mask=mask2)
+            out = self.bn2(self.conv2(out,selh2,selw2,mask2,stoch=self.stoch))
+        else:
+            out = self.bn2(self.conv2(out))
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+
+
+class Bottleneck(nn.Module):
+    expansion = 4
+
+    def __init__(self, in_planes, planes, stride=1):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
+                               stride=stride, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(planes, self.expansion *
+                               planes, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(self.expansion*planes)
+
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != self.expansion*planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, self.expansion*planes,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(self.expansion*planes)
+            )
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = F.relu(self.bn2(self.conv2(out)))
+        out = self.bn3(self.conv3(out))
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+
+
+class ResNet(nn.Module):
+    def __init__(self, block, num_blocks, num_classes=10,stoch=False):
+        super(ResNet, self).__init__()
+        self.in_planes = 64
+
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
+                               stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
+        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2, stoch=stoch)
+        self.linear = nn.Linear(512*block.expansion, num_classes)
+        self.stoch = stoch
+
+        # if self.stoch:
+        #     old_conv = self.layer4[-1].conv2
+        #     self.layer4[-1].conv2=SConv2dAvg(old_conv.weight.shape[0], 
+        #     old_conv.weight.shape[1], 
+        #     old_conv.kernel_size, 
+        #     stride=4)#old_conv.stride[0]) #Bias !?
+
+    def _make_layer(self, block, planes, num_blocks, stride, stoch=False):
+        strides = [stride] + [1]*(num_blocks-1)
+        layers = []
+        for stride in strides:
+            layers.append(block(self.in_planes, planes, stride))
+            self.in_planes = planes * block.expansion
+        if stoch:
+            layers[-1]=block(self.in_planes, planes, stride, stoch=True)
+        return nn.Sequential(*layers)
+
+    def forward(self, x , stoch = False):
+        #if self.training==False:
+        #    stoch=False
+        print(stoch)
+        # self.layer1.stoch=stoch
+        # self.layer2.stoch=stoch
+        # self.layer3.stoch=stoch
+        self.layer4[-1].stoch=stoch
+
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = self.layer4(out)
+
+        # print(out.shape)
+        out = F.avg_pool2d(out, 4)
+        # print(out.shape)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+
+
+def MyResNet18(stoch=True):
+    return ResNet(BasicBlock, [2, 2, 2, 2],stoch=stoch)
+
+
+def ResNet34():
+    return ResNet(BasicBlock, [3, 4, 6, 3])
+
+
+def MyResNet50():
+    return ResNet(Bottleneck, [3, 4, 6, 3])
+
+
+def ResNet101():
+    return ResNet(Bottleneck, [3, 4, 23, 3])
+
+
+def ResNet152():
+    return ResNet(Bottleneck, [3, 8, 36, 3])
+
+
+def test():
+    net = ResNet18()
+    y = net(torch.randn(1, 3, 32, 32))
+    print(y.size())
+
+# test()
--- a/models/stoch.py
+++ b/models/stoch.py
@ -0,0 +1,230 @@
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+import math
+
+class SConv2dStride(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1,ceil_mode=True,bias=False):    
+        super(SConv2dStride, self).__init__()
+        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size , stride=stride, padding=padding,dilation=dilation,bias=bias)
+        self.stride = stride
+        self.ceil_mode = ceil_mode
+
+    def forward(self, x,stoch = True):
+        stoch=True #for some reason average does not work...
+        if stoch:
+            device= x.device
+            selh = torch.randint(self.conv.stride[0],(1,), device=device)[0]
+            selw = torch.randint(self.conv.stride[1],(1,), device=device)[0]
+            out = self.conv(x[:,:,selh:,selw:])       
+        else:
+            self.conv.stride = (1,1)
+            out = self.conv(x)        
+            out = F.avg_pool2d(out,self.stride,ceil_mode=self.ceil_mode)
+            self.conv.stride = (self.stride,self.stride)
+        return out
+
+class SConv2dAvg(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1,ceil_mode=True, bias = True):
+        super(SConv2dAvg, self).__init__()
+        conv = nn.Conv2d(in_channels, out_channels, kernel_size)
+        self.deconv = nn.ConvTranspose2d(1, 1, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=False, dilation=1, padding_mode='zeros')
+        nn.init.constant_(self.deconv.weight, 1)
+        self.pooldeconv = nn.ConvTranspose2d(1, 1, kernel_size=stride,padding=0,stride=stride, output_padding=0, groups=1, bias=False, dilation=1, padding_mode='zeros')
+        nn.init.constant_(self.pooldeconv.weight, 1)
+        self.weight = nn.Parameter(conv.weight)
+        if bias:
+            self.bias = nn.Parameter(conv.bias)
+        else:
+            self.bias = None
+        self.stride = stride       
+        self.dilation = dilation 
+        self.padding = padding
+        self.kernel_size = kernel_size
+        self.ceil_mode = ceil_mode
+       
+    def forward(self, input, selh=-torch.ones(1,1), selw=-torch.ones(1,1), mask=-torch.ones(1,1),stoch=True,stride=-1):
+        device=input.device
+        if stride==-1:
+            stride = self.stride #if stride not defined use self.stride
+        if stoch==False:
+            stride=1 #test with real average pooling
+        batch_size, in_channels, in_h, in_w = input.shape
+        out_channels, in_channels, kh, kw =  self.weight.shape
+    
+        afterconv_h = in_h+2*self.padding-(kh-1) #size after conv
+        afterconv_w = in_w+2*self.padding-(kw-1)
+        if self.ceil_mode: #ceil_mode = talse default mode for strided conv
+            out_h = math.ceil(afterconv_h/stride)
+            out_w = math.ceil(afterconv_w/stride)
+        else: #ceil_mode = false default mode for pooling
+            out_h = math.floor(afterconv_h/stride)
+            out_w = math.floor(afterconv_w/stride)
+        unfold = torch.nn.Unfold(kernel_size=(kh, kw), dilation=self.dilation, padding=self.padding, stride=1)
+        inp_unf = unfold(input) #transform into a matrix (batch_size, in_channels*kh*kw,afterconv_h,afterconv_w)
+        if stride!=1: # if stride==1 there is no pooling
+            inp_unf = inp_unf.view(batch_size,in_channels*kh*kw,afterconv_h,afterconv_w)
+            if selh[0,0]==-1: # if not given sampled selection
+                #selction of where to sample for each pooling location
+                selh = torch.randint(stride,(out_h,out_w), device=device)
+                selw = torch.randint(stride,(out_h,out_w), device=device)
+                
+                resth = (out_h*stride)-afterconv_h
+                restw = (out_w*stride)-afterconv_w                
+                if resth!=0 and self.ceil_mode: #in case of ceil_mode need to select only the good locations for the last regions
+                    selh[-1,:]=selh[-1,:]%(stride-resth);selh[:,-1]=selh[:,-1]%(stride-restw)
+                    selw[-1,:]=selw[-1,:]%(stride-resth);selw[:,-1]=selw[:,-1]%(stride-restw)
+            #the postion should be global by adding range...
+            rng_h = selh + torch.arange(0,out_h*stride,stride,device=device).view(-1,1)
+            rng_w = selw + torch.arange(0,out_w*stride,stride,device=device)
+           
+            if mask[0,0]==-1:# in case of not given mask use only sampled selection
+                inp_unf = inp_unf[:,:,rng_h,rng_w].view(batch_size,in_channels*kh*kw,-1)
+            else:#in case of a valid mask use selection only on the mask locations
+                inp_unf = inp_unf[:,:,rng_h[mask>0],rng_w[mask>0]]
+
+        #Matrix mul
+        if self.bias is None:
+            out_unf = inp_unf.transpose(1, 2).matmul(self.weight.view(self.weight.size(0), -1).t()).transpose(1, 2)
+        else:
+            out_unf = (inp_unf.transpose(1, 2).matmul(self.weight.view(self.weight.size(0), -1).t()) + self.bias).transpose(1, 2)
+
+        if stride==1 or mask[0,0]==-1:# in case of no mask and stride==1 
+            out = out_unf.view(batch_size,out_channels,out_h,out_w) #Fold
+            #if stoch==False: #this is done outside for more clarity
+            #    out = F.avg_pool2d(out,self.stride,ceil_mode=True)
+        else:#in case of mask
+            out = torch.zeros(batch_size, out_channels,out_h,out_w,device=device)
+            out[:,:,mask>0] = out_unf
+        return out        
+
+    def forward_(self, input, selh=-torch.ones(1,1), selw=-torch.ones(1,1), mask=-torch.ones(1,1),stoch=True,stride=-1):
+        device=input.device
+        if stride==-1:
+            stride = self.stride
+        #stoch=True
+        if stoch==False:
+            stride=1 #test with real average pooling
+        batch_size, in_channels, in_h, in_w = input.shape
+        out_channels, in_channels, kh, kw =  self.weight.shape
+
+        afterconv_h = in_h+2*padding-(kh-1) #size after conv
+        afterconv_w = in_w+2*padding-(kw-1)
+        if self.ceil_mode:
+            out_h = math.ceil(afterconv_h/stride)
+            out_w = math.ceil(afterconv_w/stride)
+        else:
+            out_h = math.floor(afterconv_h/stride)
+            out_w = math.floor(afterconv_w/stride)
+        unfold = torch.nn.Unfold(kernel_size=(kh, kw), dilation=self.dilation, padding=self.padding, stride=1)
+        inp_unf = unfold(input)
+        if stride!=1:
+            inp_unf = inp_unf.view(batch_size,in_channels,kh*kw,afterconv_h,afterconv_w)
+            if selh[0,0]==-1:
+                resth = (out_h*stride)-afterconv_h
+                restw = (out_w*stride)-afterconv_w
+                selh = torch.randint(stride,(in_channels,out_h,out_w), device=device)
+                selw = torch.randint(stride,(in_channels,out_h,out_w), device=device)
+                # print(selh.shape)
+                if resth!=0:
+                    # Cas : (stride-resth)=0 ?
+                    selh[-1,:]=selh[-1,:]%(stride-resth);selh[:,-1]=selh[:,-1]%(stride-restw)
+                    selw[-1,:]=selw[-1,:]%(stride-resth);selw[:,-1]=selw[:,-1]%(stride-restw)
+            rng_h = selh + torch.arange(0,out_h*stride,stride,device=device).view(1,-1,1)
+            rng_w = selw + torch.arange(0,out_w*stride,stride,device=device).view(1,1,-1)
+            selc = torch.arange(0,in_channels,device=input.device).view(in_channels,1,1).repeat(1,out_h,out_w)           
+
+            if mask[0,0]==-1:
+                inp_unf = inp_unf.transpose(1,2)[:,:,selc,rng_h,rng_w].transpose(2,1).reshape(batch_size,in_channels*kh*kw,-1)
+            else:
+                inp_unf = inp_unf[:,:,rng_h[mask>0],rng_w[mask>0]]
+
+        #Matrix mul
+        if self.bias is None:
+            out_unf = inp_unf.transpose(1, 2).matmul(self.weight.view(self.weight.size(0), -1).t()).transpose(1, 2)
+        else:
+            out_unf = (inp_unf.transpose(1, 2).matmul(self.weight.view(self.weight.size(0), -1).t()) + self.bias).transpose(1, 2)
+
+        if stride==1 or mask[0,0]==-1:
+            out = out_unf.view(batch_size,out_channels,out_h,out_w) #Fold
+        #    if stoch==False:
+        #        out = F.avg_pool2d(out,self.stride,ceil_mode=True)
+        else:
+            out = torch.zeros(batch_size, out_channels,out_h,out_w,device=device)
+            out[:,:,mask>0] = out_unf
+        return out        
+
+
+    def comp(self,h,w,mask=-torch.ones(1,1)):
+        out_h = (h-(self.kernel_size))/self.stride
+        out_w = (w-(self.kernel_size))/self.stride
+        if self.ceil_mode:
+            out_h = math.ceil(out_h)
+            out_w = math.ceil(out_w)
+        else:
+            out_h = math.floor(out_h)
+            out_w = math.florr(out_w)
+        if mask[0,0]==-1:
+            comp = self.weight.numel()*out_h*out_w 
+        else:
+            comp = self.weight.numel()*(mask>0).sum()
+        return comp
+
+    def sample(self,h,w,mask):
+        '''
+            h, w : forward input shape
+            mask : mask of output used in computation
+        '''
+        stride = self.stride
+        out_channels, in_channels, kh, kw =  self.weight.shape
+        device=mask.device
+
+        afterconv_h = h-(kh-1) # Dim after deconv (ou after conv in forward)
+        afterconv_w = w-(kw-1)
+        print(afterconv_h/stride)
+        if self.ceil_mode:
+            out_h = math.ceil(afterconv_h/stride)
+            out_w = math.ceil(afterconv_w/stride)
+        else:
+            out_h = math.floor(afterconv_h/stride)
+            out_w = math.floor(afterconv_w/stride)
+        # out_h=((afterconv_h+2*self.padding-1)/stride)+1
+        # out_w=((afterconv_w+2*self.padding-1)/stride)+1
+        print('Out',out_h, out_w)
+        assert(tuple(mask.shape)==(out_h,out_w))
+        # out_h,out_w=mask.shape
+
+        selh = torch.randint(stride,(out_h,out_w), device=device)
+        selw = torch.randint(stride,(out_h,out_w), device=device)
+
+        resth = (out_h*stride)-afterconv_h #reste de ceil/floor, 0 ou 1
+        restw = (out_w*stride)-afterconv_w
+        print('rest', resth, restw)
+        if resth!=0:
+            selh[-1,:]=selh[-1,:]%(stride-resth);selh[:,-1]=selh[:,-1]%(stride-restw)
+            selw[-1,:]=selw[-1,:]%(stride-resth);selw[:,-1]=selw[:,-1]%(stride-restw)
+        maskh = (out_h)*stride
+        maskw = (out_w)*stride
+        print('mask', maskh, maskw)
+        rng_h = selh + torch.arange(0,out_h*stride,stride,device=device).view(-1,1)
+        rng_w = selw + torch.arange(0,out_w*stride,stride,device=device)
+        # rng_w = selw + torch.arange(0,out_w*self.stride,self.stride,device=device).view(-1,1)
+        nmask = torch.zeros((maskh,maskw),device=device)
+        nmask[rng_h,rng_w] = 1
+        #rmask = mask * nmask
+        dmask = self.pooldeconv(mask.float().view(1,1,mask.shape[0],mask.shape[1]))
+        rmask = nmask * dmask
+        #rmask = rmask[:,:,:out_h,:out_w]
+        # print('rmask', rmask.shape)
+        fmask = self.deconv(rmask)
+        # print('fmask', fmask.shape)
+        fmask = fmask[0,0]
+        return selh,selw,fmask.long()
+
+    def get_size(self,h,w):
+        newh=math.floor(((h + 2*self.padding - self.dilation*(self.kernel_size-1) - 1)/self.stride) + 1)
+        neww=math.floor(((w + 2*self.padding - self.dilation*(self.kernel_size-1) - 1)/self.stride) + 1)
+        return newh, neww
+
--- a/models/stochsim.py
+++ b/models/stochsim.py
@ -0,0 +1,147 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+import math
+
+# spatial batch and channel
+def savg_pool2d_sbc(x,size,ceil_mode=False):
+    b,c,h,w = x.shape
+    device = x.device
+    if ceil_mode:
+        out_h = math.ceil(h/size)
+        out_w = math.ceil(w/size)
+    else:
+        out_h = math.floor(h/size)
+        out_w = math.floor(w/size)
+    selh = torch.randint(size,(b,c,out_h,out_w), device=device)
+    #selh[:] = 0
+    rngh = torch.arange(0,h,size,device=x.device).view(1,1,-1,1)
+    selh = selh+rngh
+
+    selw = torch.randint(size,(b,c,out_h,out_w), device=device)
+    #selw[:] = 0
+    rngw = torch.arange(0,w,size,device=x.device).view(1,1,1,-1)
+    selw = selw+rngw
+    selc = torch.arange(0,c,device=x.device).view(1,c,1,1).repeat(b,1,out_h,out_w)
+    selb = torch.arange(0,b,device=x.device).view(b,1,1,1).repeat(1,c,out_h,out_w)
+    newx = x[selb,selc,selh, selw]
+    return newx
+
+#spatial and channel, same for all batch
+def savg_pool2d_sc(x,size,ceil_mode=False):
+    b,c,h,w = x.shape
+    device = x.device
+    if ceil_mode:
+        out_h = math.ceil(h/size)
+        out_w = math.ceil(w/size)
+    else:
+        out_h = math.floor(h/size)
+        out_w = math.floor(w/size)
+    selh = torch.randint(size,(c,out_h,out_w), device=device)
+    #selh[:] = 0
+    rngh = torch.arange(0,h,size,device=x.device).view(1,-1,1)
+    selh = selh+rngh
+
+    selw = torch.randint(size,(c,out_h,out_w), device=device)
+    #selw[:] = 0
+    rngw = torch.arange(0,w,size,device=x.device).view(1,1,-1)
+    selw = selw+rngw
+    selc = torch.arange(0,c,device=x.device).view(c,1,1).repeat(1,out_h,out_w)
+
+    newx = x[:,selc,selh, selw]
+    return newx
+
+#spatial and batch, same for all channels
+def savg_pool2d_sb(x,size,ceil_mode=False):
+    b,c,h,w = x.shape
+    device = x.device
+    if ceil_mode:
+        out_h = math.ceil(h/size)
+        out_w = math.ceil(w/size)
+    else:
+        out_h = math.floor(h/size)
+        out_w = math.floor(w/size)
+    selh = torch.randint(size,(b,out_h,out_w), device=device)
+    #selh[:] = 0
+    rngh = torch.arange(0,h,size,device=x.device).view(1,-1,1)
+    selh = selh+rngh
+
+    selw = torch.randint(size,(b,out_h,out_w), device=device)
+    #selw[:] = 0
+    rngw = torch.arange(0,w,size,device=x.device).view(1,1,-1)
+    selw = selw+rngw
+    selb = torch.arange(0,b,device=x.device).view(b,1,1).repeat(1,out_h,out_w)
+
+    newx = x.transpose(1,0)
+    newx = newx[:,selb,selh, selw]
+    return newx.transpose(1,0)
+
+#spatial stochasticity, same for all batch and channels
+def savg_pool2d_s(x,size,ceil_mode=False):
+    b,c,h,w = x.shape
+    device = x.device
+    if ceil_mode:
+        out_h = math.ceil(h/size)
+        out_w = math.ceil(w/size)
+    else:
+        out_h = math.floor(h/size)
+        out_w = math.floor(w/size)
+    selh = torch.randint(size,(out_h,out_w), device=device)
+    #selh[:] = 0
+    rngh = torch.arange(0,h,size,device=x.device).view(-1,1)
+    selh = selh+rngh
+
+    selw = torch.randint(size,(out_h,out_w), device=device)
+    #selw[:] = 0
+    rngw = torch.arange(0,w,size,device=x.device)
+    selw = selw+rngw
+
+    newx = x[:,:, selh, selw]
+    return newx
+
+def savg_pool2d_sdrop(x,size,ceil_mode=False,drop=0,repeat=1):
+    b,c,h,w = x.shape
+    device = x.device
+    if ceil_mode:
+        out_h = math.ceil(h/size)
+        out_w = math.ceil(w/size)
+    else:
+        out_h = math.floor(h/size)
+        out_w = math.floor(w/size)
+
+    for l in range(repeat):
+        selh = torch.randint(size,(out_h,out_w), device=device)
+        rngh = torch.arange(0,h,size,device=x.device).view(-1,1)
+        selh = selh+rngh
+
+        selw = torch.randint(size,(out_h,out_w), device=device)
+        rngw = torch.arange(0,w,size,device=x.device)
+        selw = selw+rngw
+
+        if l==0:
+            newx = x[:,:, selh, selw]
+        else:
+            newx = newx + x[:,:, selh, selw]
+    newx = newx/repeat
+    if drop!=0:
+        dropmask = torch.rand((c), device=device)
+        newx[:,dropmask<drop] = 0
+    return newx
+
+
+def savg_pool2d(x, stride,mode,ceil_mode=False,repeat=1):
+    if mode=='s':
+        out = savg_pool2d_s(x,stride,ceil_mode=ceil_mode)
+    if mode=='sdrop':
+        out = savg_pool2d_sdrop(x,stride,ceil_mode=ceil_mode,repeat=repeat)
+    elif mode =='sb':
+        out = savg_pool2d_sb(x,stride,ceil_mode=ceil_mode)
+    elif mode =='sc':
+        out = savg_pool2d_sc(x,stride,ceil_mode=ceil_mode)
+    elif mode =='sbc':
+        out = savg_pool2d_sbc(x,stride,ceil_mode=ceil_mode)
+    return out
+
+
+
--- a/utils.py
+++ b/utils.py
@ -0,0 +1,124 @@
+'''Some helper functions for PyTorch, including:
+    - get_mean_and_std: calculate the mean and std value of dataset.
+    - msr_init: net parameter initialization.
+    - progress_bar: progress bar mimic xlua.progress.
+'''
+import os
+import sys
+import time
+import math
+
+import torch.nn as nn
+import torch.nn.init as init
+
+
+def get_mean_and_std(dataset):
+    '''Compute the mean and std value of dataset.'''
+    dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=2)
+    mean = torch.zeros(3)
+    std = torch.zeros(3)
+    print('==> Computing mean and std..')
+    for inputs, targets in dataloader:
+        for i in range(3):
+            mean[i] += inputs[:,i,:,:].mean()
+            std[i] += inputs[:,i,:,:].std()
+    mean.div_(len(dataset))
+    std.div_(len(dataset))
+    return mean, std
+
+def init_params(net):
+    '''Init layer parameters.'''
+    for m in net.modules():
+        if isinstance(m, nn.Conv2d):
+            init.kaiming_normal(m.weight, mode='fan_out')
+            if m.bias:
+                init.constant(m.bias, 0)
+        elif isinstance(m, nn.BatchNorm2d):
+            init.constant(m.weight, 1)
+            init.constant(m.bias, 0)
+        elif isinstance(m, nn.Linear):
+            init.normal(m.weight, std=1e-3)
+            if m.bias:
+                init.constant(m.bias, 0)
+
+
+_, term_width = os.popen('stty size', 'r').read().split()
+term_width = int(term_width)
+
+TOTAL_BAR_LENGTH = 65.
+last_time = time.time()
+begin_time = last_time
+def progress_bar(current, total, msg=None):
+    global last_time, begin_time
+    if current == 0:
+        begin_time = time.time()  # Reset for new bar.
+
+    cur_len = int(TOTAL_BAR_LENGTH*current/total)
+    rest_len = int(TOTAL_BAR_LENGTH - cur_len) - 1
+
+    sys.stdout.write(' [')
+    for i in range(cur_len):
+        sys.stdout.write('=')
+    sys.stdout.write('>')
+    for i in range(rest_len):
+        sys.stdout.write('.')
+    sys.stdout.write(']')
+
+    cur_time = time.time()
+    step_time = cur_time - last_time
+    last_time = cur_time
+    tot_time = cur_time - begin_time
+
+    L = []
+    L.append('  Step: %s' % format_time(step_time))
+    L.append(' | Tot: %s' % format_time(tot_time))
+    if msg:
+        L.append(' | ' + msg)
+
+    msg = ''.join(L)
+    sys.stdout.write(msg)
+    for i in range(term_width-int(TOTAL_BAR_LENGTH)-len(msg)-3):
+        sys.stdout.write(' ')
+
+    # Go back to the center of the bar.
+    for i in range(term_width-int(TOTAL_BAR_LENGTH/2)+2):
+        sys.stdout.write('\b')
+    sys.stdout.write(' %d/%d ' % (current+1, total))
+
+    if current < total-1:
+        sys.stdout.write('\r')
+    else:
+        sys.stdout.write('\n')
+    sys.stdout.flush()
+
+def format_time(seconds):
+    days = int(seconds / 3600/24)
+    seconds = seconds - days*3600*24
+    hours = int(seconds / 3600)
+    seconds = seconds - hours*3600
+    minutes = int(seconds / 60)
+    seconds = seconds - minutes*60
+    secondsf = int(seconds)
+    seconds = seconds - secondsf
+    millis = int(seconds*1000)
+
+    f = ''
+    i = 1
+    if days > 0:
+        f += str(days) + 'D'
+        i += 1
+    if hours > 0 and i <= 2:
+        f += str(hours) + 'h'
+        i += 1
+    if minutes > 0 and i <= 2:
+        f += str(minutes) + 'm'
+        i += 1
+    if secondsf > 0 and i <= 2:
+        f += str(secondsf) + 's'
+        i += 1
+    if millis > 0 and i <= 2:
+        f += str(millis) + 'ms'
+        i += 1
+    if f == '':
+        f = '0ms'
+    return f