From 3de923156c6a3722b1c3fe36809ec720a3248c3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Harl=C3=A9?= Date: Fri, 12 Jun 2020 01:42:08 -0700 Subject: [PATCH] Initial commit --- LICENSE | 21 +++ README.md | 34 +++- jobs/test.sh | 26 +++ main.py | 351 ++++++++++++++++++++++++++++++++++++ models/Old/densenet.py | 107 +++++++++++ models/Old/dpn.py | 98 ++++++++++ models/Old/efficientnet.py | 175 ++++++++++++++++++ models/Old/googlenet.py | 107 +++++++++++ models/Old/lenet.py | 23 +++ models/Old/mobilenet.py | 61 +++++++ models/Old/mobilenetv2.py | 86 +++++++++ models/Old/mylenet.py | 71 ++++++++ models/Old/mylenet2.py | 123 +++++++++++++ models/Old/mylenet3.py | 238 ++++++++++++++++++++++++ models/Old/myresnet.py | 159 ++++++++++++++++ models/Old/myresnet2.py | 187 +++++++++++++++++++ models/Old/pnasnet.py | 125 +++++++++++++ models/Old/preact_resnet.py | 118 ++++++++++++ models/Old/regnet.py | 155 ++++++++++++++++ models/Old/resnet.py | 132 ++++++++++++++ models/Old/resnext.py | 95 ++++++++++ models/Old/sconv2davg.py | 140 ++++++++++++++ models/Old/senet.py | 121 +++++++++++++ models/Old/shufflenet.py | 109 +++++++++++ models/Old/shufflenetv2.py | 162 +++++++++++++++++ models/Old/vgg.py | 47 +++++ models/__init__.py | 2 + models/mylenet4.py | 314 ++++++++++++++++++++++++++++++++ models/myresnet3.py | 167 +++++++++++++++++ models/stoch.py | 230 +++++++++++++++++++++++ models/stochsim.py | 147 +++++++++++++++ utils.py | 124 +++++++++++++ 32 files changed, 4054 insertions(+), 1 deletion(-) create mode 100644 LICENSE create mode 100644 jobs/test.sh create mode 100644 main.py create mode 100644 models/Old/densenet.py create mode 100644 models/Old/dpn.py create mode 100644 models/Old/efficientnet.py create mode 100644 models/Old/googlenet.py create mode 100644 models/Old/lenet.py create mode 100644 models/Old/mobilenet.py create mode 100644 models/Old/mobilenetv2.py create mode 100644 models/Old/mylenet.py create mode 100644 models/Old/mylenet2.py create mode 100644 models/Old/mylenet3.py create mode 100644 models/Old/myresnet.py create mode 100644 models/Old/myresnet2.py create mode 100644 models/Old/pnasnet.py create mode 100644 models/Old/preact_resnet.py create mode 100644 models/Old/regnet.py create mode 100644 models/Old/resnet.py create mode 100644 models/Old/resnext.py create mode 100644 models/Old/sconv2davg.py create mode 100644 models/Old/senet.py create mode 100644 models/Old/shufflenet.py create mode 100644 models/Old/shufflenetv2.py create mode 100644 models/Old/vgg.py create mode 100644 models/__init__.py create mode 100644 models/mylenet4.py create mode 100644 models/myresnet3.py create mode 100644 models/stoch.py create mode 100644 models/stochsim.py create mode 100644 utils.py diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..2e229fa --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2017 liukuang + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index af28279..d7bbe9f 100644 --- a/README.md +++ b/README.md @@ -1 +1,33 @@ -# BU_Stoch_pool \ No newline at end of file +# BU_Stoch_pool + +# Train CIFAR10 with PyTorch + +I'm playing with [PyTorch](http://pytorch.org/) on the CIFAR10 dataset. + +## Prerequisites +- Python 3.6+ +- PyTorch 1.0+ + +## Accuracy +| Model | Acc. | +| ----------------- | ----------- | +| [VGG16](https://arxiv.org/abs/1409.1556) | 92.64% | +| [ResNet18](https://arxiv.org/abs/1512.03385) | 93.02% | +| [ResNet50](https://arxiv.org/abs/1512.03385) | 93.62% | +| [ResNet101](https://arxiv.org/abs/1512.03385) | 93.75% | +| [RegNetX_200MF](https://arxiv.org/abs/2003.13678) | 94.24% | +| [RegNetY_400MF](https://arxiv.org/abs/2003.13678) | 94.29% | +| [MobileNetV2](https://arxiv.org/abs/1801.04381) | 94.43% | +| [ResNeXt29(32x4d)](https://arxiv.org/abs/1611.05431) | 94.73% | +| [ResNeXt29(2x64d)](https://arxiv.org/abs/1611.05431) | 94.82% | +| [DenseNet121](https://arxiv.org/abs/1608.06993) | 95.04% | +| [PreActResNet18](https://arxiv.org/abs/1603.05027) | 95.11% | +| [DPN92](https://arxiv.org/abs/1707.01629) | 95.16% | + +## Learning rate adjustment +I manually change the `lr` during training: +- `0.1` for epoch `[0,150)` +- `0.01` for epoch `[150,250)` +- `0.001` for epoch `[250,350)` + +Resume the training with `python main.py --resume --lr=0.01` \ No newline at end of file diff --git a/jobs/test.sh b/jobs/test.sh new file mode 100644 index 0000000..6538687 --- /dev/null +++ b/jobs/test.sh @@ -0,0 +1,26 @@ +#!/bin/bash +#SBATCH --gres=gpu:1 #gpu:v100l:1 # https://docs.computecanada.ca/wiki/Using_GPUs_with_Slurm +#SBATCH --cpus-per-task=6 #6 # Cores proportional to GPUs: 6 on Cedar, 16 on Graham. +#SBATCH --mem=32000M #32000M # Memory proportional to CPUs: 32000 Cedar, 64000 Graham. +#SBATCH --account=def-mpederso +#SBATCH --time=1:00:00 +#SBATCH --job-name=MyResNet18 +#SBATCH --output=log/%x-%j.out +#SBATCH --mail-user=harle.collette.antoine@gmail.com +#SBATCH --mail-type=END +#SBATCH --mail-type=FAIL + + +# Setup +source ~/dataug/bin/activate + +#Execute +# echo $(pwd) = /home/antoh/projects/def-mpederso/antoh/stoch/jobs +cd ../ + +time python main.py \ + -n MyResNet18 \ + -ep 10 \ + -sc cosine \ + -lr 5e-2 \ + -pf _noCrop_Stoch \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..dd25b28 --- /dev/null +++ b/main.py @@ -0,0 +1,351 @@ +'''Train CIFAR10 with PyTorch.''' +import torch +import torch.nn as nn +import torch.optim as optim +import torch.nn.functional as F +import torch.backends.cudnn as cudnn + +import torchvision +import torchvision.transforms as transforms + +import os +import sys +import time +import argparse + +from models import * +# from utils import progress_bar + + +parser = argparse.ArgumentParser(description='PyTorch CIFAR10 Training') +parser.add_argument('-lr', default=0.1, type=float, help='learning rate') +parser.add_argument('--batch', default=128, type=int, help='batch_size') +parser.add_argument('--epochs', '-ep', default=10, type=int, help='epochs') +parser.add_argument('--scheduler', '-sc', dest='scheduler', default='', + help='cosine/multiStep/exponential') +parser.add_argument('--warmup_mul', '-wm', dest='warmup_mul', type=float, default=0, #2 #+ batch_size => + mutliplier + help='Warmup multiplier') +parser.add_argument('--warmup_ep', '-we', dest='warmup_ep', type=int, default=5, + help='Warmup epochs') +parser.add_argument('--resume', '-r', action='store_true', + help='resume from checkpoint') +parser.add_argument('--stoch', '-s', action='store_true', + help='use stochastic pooling') +parser.add_argument('--network', '-n', dest='net', default='MyLeNetNormal', + help='Network') +parser.add_argument('--res_folder', '-rf', dest='res_folder', default='res/', + help='Results destination') +parser.add_argument('--postfix', '-pf', dest='postfix', default='', + help='Results postfix') +parser.add_argument('--dataset', '-d', dest='dataset', default='CIFAR10', + help='Dataset') +args = parser.parse_args() +print(args) + +device = 'cuda' if torch.cuda.is_available() else 'cpu' +best_acc = 0 # best test accuracy +start_epoch = 0 # start from epoch 0 or last checkpoint epoch +checkpoint=False + +# Data +print('==> Preparing data..') +dataroot="~/scratch/data" +download_data=False +transform_train = [ + # transforms.RandomCrop(32, padding=4), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), +] + +transform_test = [ + transforms.ToTensor(), + transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), +] + +# trainset = torchvision.datasets.CIFAR10( +# root=dataroot, train=True, download=True, transform=transform_train) +# trainloader = torch.utils.data.DataLoader( +# trainset, batch_size=args.batch, shuffle=True, num_workers=2) + +# testset = torchvision.datasets.CIFAR10( +# root=dataroot, train=False, download=True, transform=transform_test) +# testloader = torch.utils.data.DataLoader( +# testset, batch_size=args.batch, shuffle=False, num_workers=2) + +# classes = ('plane', 'car', 'bird', 'cat', 'deer', +# 'dog', 'frog', 'horse', 'ship', 'truck') + +if args.dataset == 'CIFAR10': #(32x32 RGB) + transform_train=transform_train+[transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))] + transform_test=transform_test+[transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))] + + trainset = torchvision.datasets.CIFAR10(dataroot, train=True, download=download_data, transform=transforms.Compose(transform_train)) + # data_val = torchvision.datasets.CIFAR10(dataroot, train=True, download=download_data, transform=transforms.Compose(transform)) + testset = torchvision.datasets.CIFAR10(dataroot, train=False, download=download_data, transform=transforms.Compose(transform_test)) +elif args.dataset == 'TinyImageNet': #(Train:100k, Val:5k, Test:5k) (64x64 RGB) + transform_train=transform_train+[transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))] + transform_test=transform_test+[transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))] + + trainset = torchvision.datasets.ImageFolder(os.path.join(dataroot, 'tiny-imagenet-200/train'), transform=transforms.Compose(transform_train)) + # data_val = torchvision.datasets.ImageFolder(os.path.join(dataroot, 'tiny-imagenet-200/val'), transform=transforms.Compose(transform)) + testset = torchvision.datasets.ImageFolder(os.path.join(dataroot, 'tiny-imagenet-200/test'), transform=transforms.Compose(transform_test)) +else: + raise Exception('Unknown dataset') + +trainloader = torch.utils.data.DataLoader( + trainset, batch_size=args.batch, shuffle=True, num_workers=2) +testloader = torch.utils.data.DataLoader( + testset, batch_size=args.batch, shuffle=False, num_workers=2) + +# Model +print('==> Building model..') +#normal cuda convolution +# net = MyLeNetNormal() #11.3s - 49.4% #2.3GB + +#strided convolutions instead of pooling +#net = MyLeNetStride() #5.7s - 41.45% (5 epochs) #0.86GB + +#convolution with matrices unfold +#net = MyLeNetMatNormal() #19.6s - 41.3% #1.7GB + +#stochastic like fig.2 paper +#net = MyLeNetMatStoch() # 16.8s - 41.3% #1.8GB + +#storchastic Bottom-UP like fig.3 paper +# net = MyLeNetMatStochBU() # 10.5s - 45.3% #1.3GB + +net=globals()[args.net]() +print(net) +net = net.to(device) +if device == 'cuda': + net = torch.nn.DataParallel(net) + cudnn.benchmark = True + +log = [] +if args.resume: + # Load checkpoint. + print('==> Resuming from checkpoint..') + assert os.path.isdir('checkpoint'), 'Error: no checkpoint directory found!' + checkpoint = torch.load('./checkpoint/ckpt.pth') + net.load_state_dict(checkpoint['net']) + best_acc = checkpoint['acc'] + start_epoch = checkpoint['epoch'] + + print('WARNING : Log & Lr-Scheduler resuming is not available') + +criterion = nn.CrossEntropyLoss() +optimizer = optim.SGD(net.parameters(), lr=args.lr, + momentum=0.9, weight_decay=5e-4) + +# Training +max_grad = 1 #Max gradient value #Limite catastrophic drop +def train(epoch): + net.train() + train_loss = 0 + correct = 0 + total = 0 + for batch_idx, (inputs, targets) in enumerate(trainloader): + inputs, targets = inputs.to(device), targets.to(device) + optimizer.zero_grad() + outputs = net(inputs) + loss = criterion(outputs, targets) + loss.backward() + torch.nn.utils.clip_grad_norm_(net.parameters(), max_norm=max_grad, norm_type=2) #Prevent exploding grad with RNN + optimizer.step() + + #Log + train_loss += loss.item() + _, predicted = outputs.max(1) + total += targets.size(0) + correct += predicted.eq(targets).sum().item() + + # progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' + # % (train_loss/(batch_idx+1), 100.*correct/total, correct, total)) + + # if args.net in {'MyLeNetMatNormal', 'MyLeNetMatStoch', 'MyLeNetMatStochBU'}: + # print('Comp',net.comp) + return train_loss/(batch_idx+1), 100.*correct/total + +#determinisitc test +def test(epoch): + global best_acc + net.eval() + test_loss = 0 + correct = 0 + total = 0 + with torch.no_grad(): + for batch_idx, (inputs, targets) in enumerate(testloader): + inputs, targets = inputs.to(device), targets.to(device) + outputs = net(inputs,stoch=False) + loss = criterion(outputs, targets) + + test_loss += loss.item() + _, predicted = outputs.max(1) + total += targets.size(0) + correct += predicted.eq(targets).sum().item() + + # progress_bar(batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' + # % (test_loss/(batch_idx+1), 100.*correct/total, correct, total)) + + # Save checkpoint. + acc = 100.*correct/total + if acc > best_acc: + if checkpoint: + print('Saving..') + state = { + 'net': net.state_dict(), + 'acc': acc, + 'epoch': epoch, + } + if not os.path.isdir('checkpoint'): + os.mkdir('checkpoint') + torch.save(state, './checkpoint/ckpt.pth') + best_acc = acc + + return test_loss/(batch_idx+1), acc + +#Stochastic test +def stest(epoch,times=10): + global best_acc + net.eval() + test_loss = 0 + correct = 0 + total = 0 + with torch.no_grad(): + for batch_idx, (inputs, targets) in enumerate(testloader): + inputs, targets = inputs.to(device), targets.to(device) + out = torch.zeros(times,inputs.shape[0],10).cuda() + for l in range(times): + out[l] = net(inputs,stoch=True) + outputs = out.mean(0) + loss = criterion(outputs, targets) + + test_loss += loss.item() + _, predicted = outputs.max(1) + total += targets.size(0) + correct += predicted.eq(targets).sum().item() + + # progress_bar(batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' + # % (test_loss/(batch_idx+1), 100.*correct/total, correct, total)) + + # Save checkpoint. + acc = 100.*correct/total + if acc > best_acc: + print('Saving..') + state = { + 'net': net.state_dict(), + 'acc': acc, + 'epoch': epoch, + } + if not os.path.isdir('checkpoint'): + os.mkdir('checkpoint') + torch.save(state, './checkpoint/ckpt.pth') + best_acc = acc + +import matplotlib.pyplot as plt +def plot_res(log, fig_name='res'): + """Save a visual graph of the logs. + + Args: + log (dict): Logs of the training generated by most of train_utils. + fig_name (string): Relative path where to save the graph. (default: res) + """ + epochs = [x["epoch"] for x in log] + + fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(30, 15)) + + ax[0].set_title('Loss') + ax[0].plot(epochs,[x["train_loss"] for x in log], label='Train') + ax[0].plot(epochs,[x["test_loss"] for x in log], label='Test') + ax[0].legend() + + ax[1].set_title('Acc') + ax[1].plot(epochs,[x["train_acc"] for x in log], label='Train') + ax[1].plot(epochs,[x["test_acc"] for x in log], label='Test') + ax[1].legend() + + + fig_name = fig_name.replace('.',',').replace(',,/','../') + plt.savefig(fig_name, bbox_inches='tight') + plt.close() + +from warmup_scheduler import GradualWarmupScheduler +def get_scheduler(schedule, epochs, warmup_mul, warmup_ep): + scheduler=None + if schedule=='cosine': + scheduler=torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs, eta_min=0.) + elif schedule=='multiStep': + #Multistep milestones inspired by AutoAugment + scheduler=torch.optim.lr_scheduler.MultiStepLR(optimizer, + milestones=[int(epochs/3), int(epochs*2/3), int(epochs*2.7/3)], + gamma=0.1) + elif schedule=='exponential': + scheduler=torch.optim.lr_scheduler.LambdaLR(optimizer, lambda epoch: (1 - epoch / epochs) ** 0.9) + elif not(schedule is None or schedule==''): + raise ValueError("Lr scheduler unknown : %s"%schedule) + + #Warmup + if warmup_mul>=1: + scheduler=GradualWarmupScheduler(optimizer, + multiplier=warmup_mul, + total_epoch=warmup_ep, + after_scheduler=scheduler) + + return scheduler + +### MAIN ### +print_freq=args.epochs/10 +res_folder=args.res_folder +filename = ("{}-{}epochs".format(args.net,start_epoch+args.epochs))+args.postfix +log = [] + +#Lr-Scheduler +scheduler=get_scheduler(args.scheduler, args.epochs, args.warmup_mul, args.warmup_ep) + +print('==> Training model..') +t0 = time.perf_counter() +for epoch in range(start_epoch, start_epoch+args.epochs): + + train_loss, train_acc = train(epoch) + test_loss, test_acc = test(epoch) + + if scheduler is not None: + scheduler.step() + + #### Log #### + log.append({ + "epoch": epoch, + "train_loss": train_loss, + "train_acc": train_acc, + "test_loss": test_loss, + "test_acc": test_acc, + }) + + ### Print ### + if(print_freq and epoch%print_freq==0): + print('-'*9) + print('\nEpoch: %d' % epoch) + print("Acc : %.2f / %.2f"%(train_acc, test_acc)) + print("Loss : %.2f / %.2f"%(train_loss, test_loss)) + +exec_time=time.perf_counter() - t0 +print('-'*9) +print('Best Acc : %.2f'%best_acc) +print('Training time (s):',exec_time) + + +import json +try: + with open(res_folder+"log/%s.json" % filename, "w+") as f: + json.dump(log, f, indent=True) + print('Log :\"',f.name, '\" saved !') +except: + print("Failed to save logs :",filename) + print(sys.exc_info()[1]) +try: + plot_res(log, fig_name=res_folder+filename) + print('Plot :\"',res_folder+filename, '\" saved !') +except: + print("Failed to plot res") + print(sys.exc_info()[1]) \ No newline at end of file diff --git a/models/Old/densenet.py b/models/Old/densenet.py new file mode 100644 index 0000000..47ebbbe --- /dev/null +++ b/models/Old/densenet.py @@ -0,0 +1,107 @@ +'''DenseNet in PyTorch.''' +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class Bottleneck(nn.Module): + def __init__(self, in_planes, growth_rate): + super(Bottleneck, self).__init__() + self.bn1 = nn.BatchNorm2d(in_planes) + self.conv1 = nn.Conv2d(in_planes, 4*growth_rate, kernel_size=1, bias=False) + self.bn2 = nn.BatchNorm2d(4*growth_rate) + self.conv2 = nn.Conv2d(4*growth_rate, growth_rate, kernel_size=3, padding=1, bias=False) + + def forward(self, x): + out = self.conv1(F.relu(self.bn1(x))) + out = self.conv2(F.relu(self.bn2(out))) + out = torch.cat([out,x], 1) + return out + + +class Transition(nn.Module): + def __init__(self, in_planes, out_planes): + super(Transition, self).__init__() + self.bn = nn.BatchNorm2d(in_planes) + self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=False) + + def forward(self, x): + out = self.conv(F.relu(self.bn(x))) + out = F.avg_pool2d(out, 2) + return out + + +class DenseNet(nn.Module): + def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_classes=10): + super(DenseNet, self).__init__() + self.growth_rate = growth_rate + + num_planes = 2*growth_rate + self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, padding=1, bias=False) + + self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0]) + num_planes += nblocks[0]*growth_rate + out_planes = int(math.floor(num_planes*reduction)) + self.trans1 = Transition(num_planes, out_planes) + num_planes = out_planes + + self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1]) + num_planes += nblocks[1]*growth_rate + out_planes = int(math.floor(num_planes*reduction)) + self.trans2 = Transition(num_planes, out_planes) + num_planes = out_planes + + self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2]) + num_planes += nblocks[2]*growth_rate + out_planes = int(math.floor(num_planes*reduction)) + self.trans3 = Transition(num_planes, out_planes) + num_planes = out_planes + + self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3]) + num_planes += nblocks[3]*growth_rate + + self.bn = nn.BatchNorm2d(num_planes) + self.linear = nn.Linear(num_planes, num_classes) + + def _make_dense_layers(self, block, in_planes, nblock): + layers = [] + for i in range(nblock): + layers.append(block(in_planes, self.growth_rate)) + in_planes += self.growth_rate + return nn.Sequential(*layers) + + def forward(self, x): + out = self.conv1(x) + out = self.trans1(self.dense1(out)) + out = self.trans2(self.dense2(out)) + out = self.trans3(self.dense3(out)) + out = self.dense4(out) + out = F.avg_pool2d(F.relu(self.bn(out)), 4) + out = out.view(out.size(0), -1) + out = self.linear(out) + return out + +def DenseNet121(): + return DenseNet(Bottleneck, [6,12,24,16], growth_rate=32) + +def DenseNet169(): + return DenseNet(Bottleneck, [6,12,32,32], growth_rate=32) + +def DenseNet201(): + return DenseNet(Bottleneck, [6,12,48,32], growth_rate=32) + +def DenseNet161(): + return DenseNet(Bottleneck, [6,12,36,24], growth_rate=48) + +def densenet_cifar(): + return DenseNet(Bottleneck, [6,12,24,16], growth_rate=12) + +def test(): + net = densenet_cifar() + x = torch.randn(1,3,32,32) + y = net(x) + print(y) + +# test() diff --git a/models/Old/dpn.py b/models/Old/dpn.py new file mode 100644 index 0000000..d334367 --- /dev/null +++ b/models/Old/dpn.py @@ -0,0 +1,98 @@ +'''Dual Path Networks in PyTorch.''' +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class Bottleneck(nn.Module): + def __init__(self, last_planes, in_planes, out_planes, dense_depth, stride, first_layer): + super(Bottleneck, self).__init__() + self.out_planes = out_planes + self.dense_depth = dense_depth + + self.conv1 = nn.Conv2d(last_planes, in_planes, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(in_planes) + self.conv2 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=32, bias=False) + self.bn2 = nn.BatchNorm2d(in_planes) + self.conv3 = nn.Conv2d(in_planes, out_planes+dense_depth, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(out_planes+dense_depth) + + self.shortcut = nn.Sequential() + if first_layer: + self.shortcut = nn.Sequential( + nn.Conv2d(last_planes, out_planes+dense_depth, kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(out_planes+dense_depth) + ) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = F.relu(self.bn2(self.conv2(out))) + out = self.bn3(self.conv3(out)) + x = self.shortcut(x) + d = self.out_planes + out = torch.cat([x[:,:d,:,:]+out[:,:d,:,:], x[:,d:,:,:], out[:,d:,:,:]], 1) + out = F.relu(out) + return out + + +class DPN(nn.Module): + def __init__(self, cfg): + super(DPN, self).__init__() + in_planes, out_planes = cfg['in_planes'], cfg['out_planes'] + num_blocks, dense_depth = cfg['num_blocks'], cfg['dense_depth'] + + self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.last_planes = 64 + self.layer1 = self._make_layer(in_planes[0], out_planes[0], num_blocks[0], dense_depth[0], stride=1) + self.layer2 = self._make_layer(in_planes[1], out_planes[1], num_blocks[1], dense_depth[1], stride=2) + self.layer3 = self._make_layer(in_planes[2], out_planes[2], num_blocks[2], dense_depth[2], stride=2) + self.layer4 = self._make_layer(in_planes[3], out_planes[3], num_blocks[3], dense_depth[3], stride=2) + self.linear = nn.Linear(out_planes[3]+(num_blocks[3]+1)*dense_depth[3], 10) + + def _make_layer(self, in_planes, out_planes, num_blocks, dense_depth, stride): + strides = [stride] + [1]*(num_blocks-1) + layers = [] + for i,stride in enumerate(strides): + layers.append(Bottleneck(self.last_planes, in_planes, out_planes, dense_depth, stride, i==0)) + self.last_planes = out_planes + (i+2) * dense_depth + return nn.Sequential(*layers) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = self.layer1(out) + out = self.layer2(out) + out = self.layer3(out) + out = self.layer4(out) + out = F.avg_pool2d(out, 4) + out = out.view(out.size(0), -1) + out = self.linear(out) + return out + + +def DPN26(): + cfg = { + 'in_planes': (96,192,384,768), + 'out_planes': (256,512,1024,2048), + 'num_blocks': (2,2,2,2), + 'dense_depth': (16,32,24,128) + } + return DPN(cfg) + +def DPN92(): + cfg = { + 'in_planes': (96,192,384,768), + 'out_planes': (256,512,1024,2048), + 'num_blocks': (3,4,20,3), + 'dense_depth': (16,32,24,128) + } + return DPN(cfg) + + +def test(): + net = DPN92() + x = torch.randn(1,3,32,32) + y = net(x) + print(y) + +# test() diff --git a/models/Old/efficientnet.py b/models/Old/efficientnet.py new file mode 100644 index 0000000..53d8c7b --- /dev/null +++ b/models/Old/efficientnet.py @@ -0,0 +1,175 @@ +'''EfficientNet in PyTorch. + +Paper: "EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks". + +Reference: https://github.com/keras-team/keras-applications/blob/master/keras_applications/efficientnet.py +''' +import torch +import torch.nn as nn +import torch.nn.functional as F + + +def swish(x): + return x * x.sigmoid() + + +def drop_connect(x, drop_ratio): + keep_ratio = 1.0 - drop_ratio + mask = torch.empty([x.shape[0], 1, 1, 1], dtype=x.dtype, device=x.device) + mask.bernoulli_(keep_ratio) + x.div_(keep_ratio) + x.mul_(mask) + return x + + +class SE(nn.Module): + '''Squeeze-and-Excitation block with Swish.''' + + def __init__(self, in_channels, se_channels): + super(SE, self).__init__() + self.se1 = nn.Conv2d(in_channels, se_channels, + kernel_size=1, bias=True) + self.se2 = nn.Conv2d(se_channels, in_channels, + kernel_size=1, bias=True) + + def forward(self, x): + out = F.adaptive_avg_pool2d(x, (1, 1)) + out = swish(self.se1(out)) + out = self.se2(out).sigmoid() + out = x * out + return out + + +class Block(nn.Module): + '''expansion + depthwise + pointwise + squeeze-excitation''' + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride, + expand_ratio=1, + se_ratio=0., + drop_rate=0.): + super(Block, self).__init__() + self.stride = stride + self.drop_rate = drop_rate + self.expand_ratio = expand_ratio + + # Expansion + channels = expand_ratio * in_channels + self.conv1 = nn.Conv2d(in_channels, + channels, + kernel_size=1, + stride=1, + padding=0, + bias=False) + self.bn1 = nn.BatchNorm2d(channels) + + # Depthwise conv + self.conv2 = nn.Conv2d(channels, + channels, + kernel_size=kernel_size, + stride=stride, + padding=(1 if kernel_size == 3 else 2), + groups=channels, + bias=False) + self.bn2 = nn.BatchNorm2d(channels) + + # SE layers + se_channels = int(in_channels * se_ratio) + self.se = SE(channels, se_channels) + + # Output + self.conv3 = nn.Conv2d(channels, + out_channels, + kernel_size=1, + stride=1, + padding=0, + bias=False) + self.bn3 = nn.BatchNorm2d(out_channels) + + # Skip connection if in and out shapes are the same (MV-V2 style) + self.has_skip = (stride == 1) and (in_channels == out_channels) + + def forward(self, x): + out = x if self.expand_ratio == 1 else swish(self.bn1(self.conv1(x))) + out = swish(self.bn2(self.conv2(out))) + out = self.se(out) + out = self.bn3(self.conv3(out)) + if self.has_skip: + if self.training and self.drop_rate > 0: + out = drop_connect(out, self.drop_rate) + out = out + x + return out + + +class EfficientNet(nn.Module): + def __init__(self, cfg, num_classes=10): + super(EfficientNet, self).__init__() + self.cfg = cfg + self.conv1 = nn.Conv2d(3, + 32, + kernel_size=3, + stride=1, + padding=1, + bias=False) + self.bn1 = nn.BatchNorm2d(32) + self.layers = self._make_layers(in_channels=32) + self.linear = nn.Linear(cfg['out_channels'][-1], num_classes) + + def _make_layers(self, in_channels): + layers = [] + cfg = [self.cfg[k] for k in ['expansion', 'out_channels', 'num_blocks', 'kernel_size', + 'stride']] + b = 0 + blocks = sum(self.cfg['num_blocks']) + for expansion, out_channels, num_blocks, kernel_size, stride in zip(*cfg): + strides = [stride] + [1] * (num_blocks - 1) + for stride in strides: + drop_rate = self.cfg['drop_connect_rate'] * b / blocks + layers.append( + Block(in_channels, + out_channels, + kernel_size, + stride, + expansion, + se_ratio=0.25, + drop_rate=drop_rate)) + in_channels = out_channels + return nn.Sequential(*layers) + + def forward(self, x): + out = swish(self.bn1(self.conv1(x))) + out = self.layers(out) + out = F.adaptive_avg_pool2d(out, 1) + out = out.view(out.size(0), -1) + dropout_rate = self.cfg['dropout_rate'] + if self.training and dropout_rate > 0: + out = F.dropout(out, p=dropout_rate) + out = self.linear(out) + return out + + +def EfficientNetB0(): + cfg = { + 'num_blocks': [1, 2, 2, 3, 3, 4, 1], + 'expansion': [1, 6, 6, 6, 6, 6, 6], + 'out_channels': [16, 24, 40, 80, 112, 192, 320], + 'kernel_size': [3, 3, 5, 3, 5, 5, 3], + 'stride': [1, 2, 2, 2, 1, 2, 1], + 'dropout_rate': 0.2, + 'drop_connect_rate': 0.2, + } + return EfficientNet(cfg) + + +def test(): + net = EfficientNetB0() + x = torch.randn(2, 3, 32, 32) + y = net(x) + print(y.shape) + + +if __name__ == '__main__': + test() diff --git a/models/Old/googlenet.py b/models/Old/googlenet.py new file mode 100644 index 0000000..de036d8 --- /dev/null +++ b/models/Old/googlenet.py @@ -0,0 +1,107 @@ +'''GoogLeNet with PyTorch.''' +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class Inception(nn.Module): + def __init__(self, in_planes, n1x1, n3x3red, n3x3, n5x5red, n5x5, pool_planes): + super(Inception, self).__init__() + # 1x1 conv branch + self.b1 = nn.Sequential( + nn.Conv2d(in_planes, n1x1, kernel_size=1), + nn.BatchNorm2d(n1x1), + nn.ReLU(True), + ) + + # 1x1 conv -> 3x3 conv branch + self.b2 = nn.Sequential( + nn.Conv2d(in_planes, n3x3red, kernel_size=1), + nn.BatchNorm2d(n3x3red), + nn.ReLU(True), + nn.Conv2d(n3x3red, n3x3, kernel_size=3, padding=1), + nn.BatchNorm2d(n3x3), + nn.ReLU(True), + ) + + # 1x1 conv -> 5x5 conv branch + self.b3 = nn.Sequential( + nn.Conv2d(in_planes, n5x5red, kernel_size=1), + nn.BatchNorm2d(n5x5red), + nn.ReLU(True), + nn.Conv2d(n5x5red, n5x5, kernel_size=3, padding=1), + nn.BatchNorm2d(n5x5), + nn.ReLU(True), + nn.Conv2d(n5x5, n5x5, kernel_size=3, padding=1), + nn.BatchNorm2d(n5x5), + nn.ReLU(True), + ) + + # 3x3 pool -> 1x1 conv branch + self.b4 = nn.Sequential( + nn.MaxPool2d(3, stride=1, padding=1), + nn.Conv2d(in_planes, pool_planes, kernel_size=1), + nn.BatchNorm2d(pool_planes), + nn.ReLU(True), + ) + + def forward(self, x): + y1 = self.b1(x) + y2 = self.b2(x) + y3 = self.b3(x) + y4 = self.b4(x) + return torch.cat([y1,y2,y3,y4], 1) + + +class GoogLeNet(nn.Module): + def __init__(self): + super(GoogLeNet, self).__init__() + self.pre_layers = nn.Sequential( + nn.Conv2d(3, 192, kernel_size=3, padding=1), + nn.BatchNorm2d(192), + nn.ReLU(True), + ) + + self.a3 = Inception(192, 64, 96, 128, 16, 32, 32) + self.b3 = Inception(256, 128, 128, 192, 32, 96, 64) + + self.maxpool = nn.MaxPool2d(3, stride=2, padding=1) + + self.a4 = Inception(480, 192, 96, 208, 16, 48, 64) + self.b4 = Inception(512, 160, 112, 224, 24, 64, 64) + self.c4 = Inception(512, 128, 128, 256, 24, 64, 64) + self.d4 = Inception(512, 112, 144, 288, 32, 64, 64) + self.e4 = Inception(528, 256, 160, 320, 32, 128, 128) + + self.a5 = Inception(832, 256, 160, 320, 32, 128, 128) + self.b5 = Inception(832, 384, 192, 384, 48, 128, 128) + + self.avgpool = nn.AvgPool2d(8, stride=1) + self.linear = nn.Linear(1024, 10) + + def forward(self, x): + out = self.pre_layers(x) + out = self.a3(out) + out = self.b3(out) + out = self.maxpool(out) + out = self.a4(out) + out = self.b4(out) + out = self.c4(out) + out = self.d4(out) + out = self.e4(out) + out = self.maxpool(out) + out = self.a5(out) + out = self.b5(out) + out = self.avgpool(out) + out = out.view(out.size(0), -1) + out = self.linear(out) + return out + + +def test(): + net = GoogLeNet() + x = torch.randn(1,3,32,32) + y = net(x) + print(y.size()) + +# test() diff --git a/models/Old/lenet.py b/models/Old/lenet.py new file mode 100644 index 0000000..d657b74 --- /dev/null +++ b/models/Old/lenet.py @@ -0,0 +1,23 @@ +'''LeNet in PyTorch.''' +import torch.nn as nn +import torch.nn.functional as F + +class LeNet(nn.Module): + def __init__(self): + super(LeNet, self).__init__() + self.conv1 = nn.Conv2d(3, 6, 5) + self.conv2 = nn.Conv2d(6, 16, 5) + self.fc1 = nn.Linear(16*5*5, 120) + self.fc2 = nn.Linear(120, 84) + self.fc3 = nn.Linear(84, 10) + + def forward(self, x): + out = F.relu(self.conv1(x)) + out = F.max_pool2d(out, 2) + out = F.relu(self.conv2(out)) + out = F.max_pool2d(out, 2) + out = out.view(out.size(0), -1) + out = F.relu(self.fc1(out)) + out = F.relu(self.fc2(out)) + out = self.fc3(out) + return out diff --git a/models/Old/mobilenet.py b/models/Old/mobilenet.py new file mode 100644 index 0000000..497ef1e --- /dev/null +++ b/models/Old/mobilenet.py @@ -0,0 +1,61 @@ +'''MobileNet in PyTorch. + +See the paper "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications" +for more details. +''' +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class Block(nn.Module): + '''Depthwise conv + Pointwise conv''' + def __init__(self, in_planes, out_planes, stride=1): + super(Block, self).__init__() + self.conv1 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=in_planes, bias=False) + self.bn1 = nn.BatchNorm2d(in_planes) + self.conv2 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False) + self.bn2 = nn.BatchNorm2d(out_planes) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = F.relu(self.bn2(self.conv2(out))) + return out + + +class MobileNet(nn.Module): + # (128,2) means conv planes=128, conv stride=2, by default conv stride=1 + cfg = [64, (128,2), 128, (256,2), 256, (512,2), 512, 512, 512, 512, 512, (1024,2), 1024] + + def __init__(self, num_classes=10): + super(MobileNet, self).__init__() + self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(32) + self.layers = self._make_layers(in_planes=32) + self.linear = nn.Linear(1024, num_classes) + + def _make_layers(self, in_planes): + layers = [] + for x in self.cfg: + out_planes = x if isinstance(x, int) else x[0] + stride = 1 if isinstance(x, int) else x[1] + layers.append(Block(in_planes, out_planes, stride)) + in_planes = out_planes + return nn.Sequential(*layers) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = self.layers(out) + out = F.avg_pool2d(out, 2) + out = out.view(out.size(0), -1) + out = self.linear(out) + return out + + +def test(): + net = MobileNet() + x = torch.randn(1,3,32,32) + y = net(x) + print(y.size()) + +# test() diff --git a/models/Old/mobilenetv2.py b/models/Old/mobilenetv2.py new file mode 100644 index 0000000..17e5823 --- /dev/null +++ b/models/Old/mobilenetv2.py @@ -0,0 +1,86 @@ +'''MobileNetV2 in PyTorch. + +See the paper "Inverted Residuals and Linear Bottlenecks: +Mobile Networks for Classification, Detection and Segmentation" for more details. +''' +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class Block(nn.Module): + '''expand + depthwise + pointwise''' + def __init__(self, in_planes, out_planes, expansion, stride): + super(Block, self).__init__() + self.stride = stride + + planes = expansion * in_planes + self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, stride=1, padding=0, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, groups=planes, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d(planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False) + self.bn3 = nn.BatchNorm2d(out_planes) + + self.shortcut = nn.Sequential() + if stride == 1 and in_planes != out_planes: + self.shortcut = nn.Sequential( + nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False), + nn.BatchNorm2d(out_planes), + ) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = F.relu(self.bn2(self.conv2(out))) + out = self.bn3(self.conv3(out)) + out = out + self.shortcut(x) if self.stride==1 else out + return out + + +class MobileNetV2(nn.Module): + # (expansion, out_planes, num_blocks, stride) + cfg = [(1, 16, 1, 1), + (6, 24, 2, 1), # NOTE: change stride 2 -> 1 for CIFAR10 + (6, 32, 3, 2), + (6, 64, 4, 2), + (6, 96, 3, 1), + (6, 160, 3, 2), + (6, 320, 1, 1)] + + def __init__(self, num_classes=10): + super(MobileNetV2, self).__init__() + # NOTE: change conv1 stride 2 -> 1 for CIFAR10 + self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(32) + self.layers = self._make_layers(in_planes=32) + self.conv2 = nn.Conv2d(320, 1280, kernel_size=1, stride=1, padding=0, bias=False) + self.bn2 = nn.BatchNorm2d(1280) + self.linear = nn.Linear(1280, num_classes) + + def _make_layers(self, in_planes): + layers = [] + for expansion, out_planes, num_blocks, stride in self.cfg: + strides = [stride] + [1]*(num_blocks-1) + for stride in strides: + layers.append(Block(in_planes, out_planes, expansion, stride)) + in_planes = out_planes + return nn.Sequential(*layers) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = self.layers(out) + out = F.relu(self.bn2(self.conv2(out))) + # NOTE: change pooling kernel_size 7 -> 4 for CIFAR10 + out = F.avg_pool2d(out, 4) + out = out.view(out.size(0), -1) + out = self.linear(out) + return out + + +def test(): + net = MobileNetV2() + x = torch.randn(2,3,32,32) + y = net(x) + print(y.size()) + +# test() diff --git a/models/Old/mylenet.py b/models/Old/mylenet.py new file mode 100644 index 0000000..343fd69 --- /dev/null +++ b/models/Old/mylenet.py @@ -0,0 +1,71 @@ +'''LeNet in PyTorch.''' +import torch +import torch.nn as nn +import torch.nn.functional as F + +class MyLeNet(nn.Module): + def __init__(self): + super(MyLeNet, self).__init__() + self.conv1 = nn.Conv2d(3, 6, 5) + self.conv2 = nn.Conv2d(6, 16, 5) + self.fc1 = nn.Linear(16*5*5, 120) + self.fc2 = nn.Linear(120, 84) + self.fc3 = nn.Linear(84, 10) + + def savg_pool2d(self,x,size): + b,c,h,w = x.shape + selh = torch.LongTensor(h/size,w/size).random_(0, size) + rngh = torch.arange(0,h,size).long().view(h/size,1).repeat(1,w/size).view(h/size,w/size) + selx = (selh+rngh).repeat(b,c,1,1) + + selw = torch.LongTensor(h/size,w/size).random_(0, size) + rngw = torch.arange(0,w,size).long().view(1,h/size).repeat(h/size,1).view(h/size,w/size) + sely = (selw+rngw).repeat(b,c,1,1) + bv, cv ,hv, wv = torch.meshgrid([torch.arange(0,b), torch.arange(0,c),torch.arange(0,h/size),torch.arange(0,w/size)]) + #x=x.view(b,c,h*w) + newx = x[bv,cv, selx, sely] + #ghdh + return newx + + def ssoftmax_pool2d(self,x,size,idx): + b,c,h,w = x.shape + w = wdataset[idx] + selh = torch.LongTensor(h/size,w/size).random_(0, size) + rngh = torch.arange(0,h,size).long().view(h/size,1).repeat(1,w/size).view(h/size,w/size) + selx = (selh+rngh).repeat(b,c,1,1) + + selw = torch.LongTensor(h/size,w/size).random_(0, size) + rngw = torch.arange(0,w,size).long().view(1,h/size).repeat(h/size,1).view(h/size,w/size) + sely = (selw+rngw).repeat(b,c,1,1) + bv, cv ,hv, wv = torch.meshgrid([torch.arange(0,b), torch.arange(0,c),torch.arange(0,h/size),torch.arange(0,w/size)]) + #x=x.view(b,c,h*w) + newx = x[bv,cv, selx, sely] + #ghdh + return newx + + def mavg_pool2d(self,x,size): + b,c,h,w = x.shape + #newx=(x[:,:,0::2,0::2]+x[:,:,1::2,0::2]+x[:,:,0::2,1::2]+x[:,:,1::2,1::2])/4 + newx=(x[:,:,0::2,0::2]) + return newx + + + def forward(self, x, stoch=True): + if self.training==False: + stoch=False + out = F.relu(self.conv1(x)) + if stoch: + out = self.savg_pool2d(out, 2) + else: + out = F.avg_pool2d(out, 2) + out = F.relu(self.conv2(out)) + if stoch: + out = self.savg_pool2d(out, 2) + else: + out = F.avg_pool2d(out, 2) + out = out.view(out.size(0), -1) + out = F.relu(self.fc1(out)) + out = F.relu(self.fc2(out)) + out = self.fc3(out) + return out + diff --git a/models/Old/mylenet2.py b/models/Old/mylenet2.py new file mode 100644 index 0000000..47aec65 --- /dev/null +++ b/models/Old/mylenet2.py @@ -0,0 +1,123 @@ +'''LeNet in PyTorch.''' +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class MyLeNet2(nn.Module): + def __init__(self): + super(MyLeNet2, self).__init__() + self.conv1 = nn.Conv2d(3, 60, 5) + self.conv2 = nn.Conv2d(60, 160, 5) + self.fc1 = nn.Linear(160*5*5, 120) + self.fc2 = nn.Linear(120, 84) + self.fc3 = nn.Linear(84, 10) + +# Vanilla Convolution + def myconv2d(self, input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1): + batch_size, in_channels, in_h, in_w = input.shape + out_channels, in_channels, kh, kw = weight.shape + out_h = in_h-2*(int(kh)/2) + out_w = in_w-2*(int(kw)/2) + + unfold = torch.nn.Unfold(kernel_size=(kh, kw), dilation=dilation, padding=padding, stride=stride) + inp_unf = unfold(input)#.view(batch_size,in_channels*kh*kw,out_h,out_w) + + + if bias is None: + out_unf = inp_unf.transpose(1, 2).matmul(weight.view(weight.size(0), -1).t()).transpose(1, 2) + else: + out_unf = (inp_unf.transpose(1, 2).matmul(weight.view(weight.size(0), -1).t()) + bias).transpose(1, 2) + + out = out_unf.view(batch_size, out_channels, out_h, out_w) + return out + + def myconv2d_avg(self, input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1,size=2): + batch_size, in_channels, in_h, in_w = input.shape + out_channels, in_channels, kh, kw = weight.shape + out_h = in_h-2*(int(kh)/2) + out_w = in_w-2*(int(kw)/2) + + unfold = torch.nn.Unfold(kernel_size=(kh, kw), dilation=dilation, padding=padding, stride=stride) + inp_unf = unfold(input).view(batch_size,in_channels*kh*kw,out_h,out_w) + sel_h = torch.LongTensor(out_h/size,out_w/size).random_(0, size)#.cuda() + rng_h = sel_h + torch.arange(0,out_h,size).long()#.cuda() + + sel_w = torch.LongTensor(out_h/size,out_w/size).random_(0, size)#.cuda() + rng_w = sel_w+torch.arange(0,out_w,size).long()#.cuda() + inp_unf = inp_unf[:,:,rng_h,rng_w].view(batch_size,in_channels*kh*kw,out_h/size*out_w/size) + #unfold_avg = torch.nn.Unfold(kernel_size=(1, 1), dilation=1, padding=0, stride=2) + + if bias is None: + out_unf = inp_unf.transpose(1, 2).matmul(weight.view(weight.size(0), -1).t()).transpose(1, 2) + else: + out_unf = (inp_unf.transpose(1, 2).matmul(weight.view(weight.size(0), -1).t()) + bias).transpose(1, 2) + + out = out_unf.view(batch_size, out_channels, out_h/size, out_w/size).contiguous() + return out + + + def savg_pool2d(self,x,size): + b,c,h,w = x.shape + selh = torch.LongTensor(h/size,w/size).random_(0, size) + rngh = torch.arange(0,h,size).long().view(h/size,1).repeat(1,w/size).view(h/size,w/size) + selx = (selh+rngh).repeat(b,c,1,1) + + selw = torch.LongTensor(h/size,w/size).random_(0, size) + rngw = torch.arange(0,w,size).long().view(1,h/size).repeat(h/size,1).view(h/size,w/size) + sely = (selw+rngw).repeat(b,c,1,1) + bv, cv ,hv, wv = torch.meshgrid([torch.arange(0,b), torch.arange(0,c),torch.arange(0,h/size),torch.arange(0,w/size)]) + #x=x.view(b,c,h*w) + newx = x[bv,cv, selx, sely] + #ghdh + return newx + + def ssoftmax_pool2d(self,x,size,idx): + b,c,h,w = x.shape + w = wdataset[idx] + selh = torch.LongTensor(h/size,w/size).random_(0, size) + rngh = torch.arange(0,h,size).long().view(h/size,1).repeat(1,w/size).view(h/size,w/size) + selx = (selh+rngh).repeat(b,c,1,1) + + selw = torch.LongTensor(h/size,w/size).random_(0, size) + rngw = torch.arange(0,w,size).long().view(1,h/size).repeat(h/size,1).view(h/size,w/size) + sely = (selw+rngw).repeat(b,c,1,1) + bv, cv ,hv, wv = torch.meshgrid([torch.arange(0,b), torch.arange(0,c),torch.arange(0,h/size),torch.arange(0,w/size)]) + #x=x.view(b,c,h*w) + newx = x[bv,cv, selx, sely] + #ghdh + return newx + + def mavg_pool2d(self,x,size): + b,c,h,w = x.shape + #newx=(x[:,:,0::2,0::2]+x[:,:,1::2,0::2]+x[:,:,0::2,1::2]+x[:,:,1::2,1::2])/4 + newx=(x[:,:,0::2,0::2]) + return newx + + + def forward(self, x, stoch=True): + if self.training==False: + stoch=False + #out = F.relu(self.conv1(x)) + out = F.relu(self.myconv2d(x, self.conv1.weight, bias=self.conv1.bias)) + if stoch: + out = self.savg_pool2d(out, 2) + else: + out = F.avg_pool2d(out, 2) + #out = F.relu(self.conv2(out)) + if 0: + out = F.relu(self.myconv2d_avg(out, self.conv2.weight, bias=self.conv2.bias,size=2)) + else: + #out = F.relu(self.conv2(out)) + out = F.relu(self.myconv2d(out, self.conv2.weight, bias=self.conv2.bias)) + out = F.avg_pool2d(out, 2) + #if stoch: + # out = self.savg_pool2d(out, 2) + #else: + # out = F.avg_pool2d(out, 2) + out = out.view(out.size(0), -1 ) + out = F.relu(self.fc1(out)) + out = F.relu(self.fc2(out)) + out = self.fc3(out) + return out + diff --git a/models/Old/mylenet3.py b/models/Old/mylenet3.py new file mode 100644 index 0000000..dbf96be --- /dev/null +++ b/models/Old/mylenet3.py @@ -0,0 +1,238 @@ +'''LeNet in PyTorch.''' +import torch +import torch.nn as nn +import torch.nn.functional as F + +import math + +from .sconv2davg import SConv2dAvg + +class MyLeNetNormal(nn.Module):#epoch 12s + def __init__(self): + super(MyLeNetNormal, self).__init__() + self.conv1 = nn.Conv2d(3, 200, 5, stride=1) + self.conv2 = nn.Conv2d(200, 400, 3, stride=1) + self.conv3 = nn.Conv2d(400, 800, 3, stride=1) + self.fc1 = nn.Linear(800, 10) + + def forward(self, x, stoch=True): + _,_,h0,w0 = x.shape + out = F.relu(self.conv1(x)) + _,_,h1,w1 = out.shape + out = F.avg_pool2d(out,2,ceil_mode=True) + out = F.relu(self.conv2(out)) + _,_,h2,w2 = out.shape + out = F.avg_pool2d(out,2,ceil_mode=True) + out = F.relu(self.conv3(out)) + out = F.avg_pool2d(out,4,ceil_mode=True) + + out = out.view(out.size(0), -1 ) + out = (self.fc1(out)) + + return out + +def savg_pool2d(x,size,ceil_mode=False): + b,c,h,w = x.shape + device = x.device + if ceil_mode: + out_h = math.ceil(h/size) + out_w = math.ceil(w/size) + else: + out_h = math.floor(h/size) + out_w = math.floor(w/size) + selh = torch.randint(size,(out_h,out_w), device=device) + #selh[:] = 0 + rngh = torch.arange(0,h,size,device=x.device).view(-1,1) + selh = selh+rngh + + selw = torch.randint(size,(out_h,out_w), device=device) + #selw[:] = 0 + rngw = torch.arange(0,w,size,device=x.device) + selw = selw+rngw + + newx = x[:,:, selh, selw] + return newx + +def savg_pool2d_(x,size,ceil_mode=False): + b,c,h,w = x.shape + device = x.device + selh = torch.randint(size,(math.floor(h/size),math.floor(w/size)), device=device) + rngh = torch.arange(0,h,size, device=device).long().view(h/size,1).repeat(1,w/size).view(math.floor(h/size),math.floor(w/size)) + selx = (selh+rngh).repeat(b,c,1,1) + + selw = torch.randint(size,(math.floor(h/size),math.floor(w/size)), device=device) + rngw = torch.arange(0,w,size, device=device).long().view(1,h/size).repeat(h/size,1).view(math.floor(h/size),math.floor(w/size)) + sely = (selw+rngw).repeat(b,c,1,1) + bv, cv ,hv, wv = torch.meshgrid([torch.arange(0,b), torch.arange(0,c),torch.arange(0,h/size),torch.arange(0,w/size)]) + #x=x.view(b,c,h*w) + newx = x[bv,cv, selx, sely] + #ghdh + return newx + +class MyLeNetSimNormal(nn.Module):#epoch 12s + def __init__(self): + super(MyLeNetSimNormal, self).__init__() + self.conv1 = nn.Conv2d(3, 200, 5, stride=1) + self.conv2 = nn.Conv2d(200, 400, 3, stride=1) + self.conv3 = nn.Conv2d(400, 800, 3, stride=1) + self.fc1 = nn.Linear(800, 10) + + def forward(self, x, stoch=True): + + #stoch=True + out = F.relu(self.conv1(x)) + if stoch: + out = savg_pool2d(out,2,ceil_mode=True) + else: + out = F.avg_pool2d(out,2,ceil_mode=True) + out = F.relu(self.conv2(out)) + if stoch: + out = savg_pool2d(out,2,ceil_mode=True) + else: + out = F.avg_pool2d(out,2,ceil_mode=True) + out = F.relu(self.conv3(out)) + if stoch: + out = savg_pool2d(out,4,ceil_mode=True) + else: + out = F.avg_pool2d(out,4,ceil_mode=True) + + out = out.view(out.size(0), -1 ) + out = (self.fc1(out)) + return out + + +class MyLeNetStride(nn.Module):#epoch 6s + def __init__(self): + super(MyLeNetStride, self).__init__() + self.conv1 = nn.Conv2d(3, 200, 5, stride=2) + self.conv2 = nn.Conv2d(200, 400, 3, stride=2) + self.conv3 = nn.Conv2d(400, 800, 3, stride=4) + self.fc1 = nn.Linear(800, 10) + + def forward(self, x, stoch=True): + + out = F.relu(self.conv1(x)) + out = F.relu(self.conv2(out)) + out = F.relu(self.conv3(out)) + + out = out.view(out.size(0), -1 ) + out = (self.fc1(out)) + return out + +class MyLeNetMatNormal(nn.Module):#epach 21s + def __init__(self): + super(MyLeNetMatNormal, self).__init__() + self.conv1 = SConv2dAvg(3, 200, 5, stride=1) + self.conv2 = SConv2dAvg(200, 400, 3, stride=1) + self.conv3 = SConv2dAvg(400, 800, 3, stride=1) + self.fc1 = nn.Linear(800, 10) + + def forward(self, x, stoch=True): + _,_,h0,w0 = x.shape + out = F.relu(self.conv1(x)) + out = F.avg_pool2d(out,2,ceil_mode=True) + + _,_,h1,w1 = out.shape + out = F.relu(self.conv2(out)) + out = F.avg_pool2d(out,2,ceil_mode=True) + + _,_,h2,w2 = out.shape + out = F.relu(self.conv3(out)) + out = F.avg_pool2d(out,4,ceil_mode=True) + + out = out.view(out.size(0), -1 ) + out = (self.fc1(out)) + + if 1: + comp = 0 + comp+=self.conv1.comp(h0,w0) + comp+=self.conv2.comp(h1,w1) + comp+=self.conv3.comp(h2,w2) + self.comp = comp/1000000 + return out + + +class MyLeNetMatStoch(nn.Module):#epoch 17s + def __init__(self): + super(MyLeNetMatStoch, self).__init__() + self.conv1 = SConv2dAvg(3, 200, 5, stride=2,ceil_mode=True) + self.conv2 = SConv2dAvg(200, 400, 3, stride=2,ceil_mode=True) + self.conv3 = SConv2dAvg(400, 800, 3, stride=4,ceil_mode=True) + self.fc1 = nn.Linear(800, 10) + + def forward(self, x, stoch=True): + # if stoch: + _,_,h0,w0=x.shape + out = F.relu(self.conv1(x,stoch=stoch)) + _,_,h1,w1=out.shape + out = F.relu(self.conv2(out,stoch=stoch)) + _,_,h2,w2=out.shape + out = F.relu(self.conv3(out,stoch=stoch)) + # else: + # out = F.relu(self.conv1(x,stoch=True,stride=1)) + # out = F.avg_pool2d(out,2,ceil_mode=True) + # out = F.relu(self.conv2(out,stoch=True,stride=1)) + # out = F.avg_pool2d(out,2,ceil_mode=True) + # out = F.relu(self.conv3(out,stoch=True,stride=1)) + # out = F.avg_pool2d(out,4,ceil_mode=True) + + out = out.view(out.size(0), -1 ) + out = self.fc1(out) + #Estimate computation + if 1: + comp = 0 + comp+=self.conv1.comp(h0,w0) + comp+=self.conv2.comp(h1,w1) + comp+=self.conv3.comp(h2,w2) + self.comp = comp/1000000 + return out + +class MyLeNetMatStochBU(nn.Module):#epoch 11s + def __init__(self): + super(MyLeNetMatStochBU, self).__init__() + self.conv1 = SConv2dAvg(3, 200, 5, stride=2,ceil_mode=True) + self.conv2 = SConv2dAvg(200, 400, 3, stride=2,ceil_mode=True) + self.conv3 = SConv2dAvg(400, 800, 3, stride=4,ceil_mode=True) + self.fc1 = nn.Linear(800, 10) + + def forward(self, x, stoch=True): + #get sizes + h0,w0 = x.shape[2],x.shape[3] + h1,w1 = self.conv1.get_size(h0,w0) + h2,w2 = self.conv2.get_size(h1,w1) + h3,w3 = self.conv3.get_size(h2,w2) + # print('Shapes :') + # print('0', h0, w0) + # print('1', h1, w1) + # print('2', h2, w2) + # print('3', h3, w3) + #sample BU + # mask3 = torch.ones(h3,w3).cuda() + mask3 = torch.ones((h3,w3), device=x.device) + selh3,selw3,mask2 = self.conv3.sample(h2,w2,mask=mask3) + selh2,selw2,mask1 = self.conv2.sample(h1,w1,mask=mask2) + selh1,selw1,mask0 = self.conv1.sample(h0,w0,mask=mask1) + #forward + if stoch: + out = F.relu(self.conv1(x,selh1,selw1,mask1,stoch=stoch)) + out = F.relu(self.conv2(out,selh2,selw2,mask2,stoch=stoch)) + out = F.relu(self.conv3(out,selh3,selw3,mask3,stoch=stoch)) + else: + out = F.relu(self.conv1(x,stoch=True,stride=1)) + out = F.avg_pool2d(out,2,ceil_mode=True) + out = F.relu(self.conv2(out,stoch=True,stride=1)) + out = F.avg_pool2d(out,2,ceil_mode=True) + out = F.relu(self.conv3(out,stoch=True,stride=1)) + out = F.avg_pool2d(out,4,ceil_mode=True) + + out = out.view(out.size(0), -1 ) + out = (self.fc1(out)) + #Estimate computation + if 1: + comp = 0 + comp+=self.conv1.comp(h0,w0,mask1) + comp+=self.conv2.comp(h1,w1,mask2) + comp+=self.conv3.comp(h2,w2,mask3) + self.comp = comp.item()/1000000 + return out + diff --git a/models/Old/myresnet.py b/models/Old/myresnet.py new file mode 100644 index 0000000..7d4d504 --- /dev/null +++ b/models/Old/myresnet.py @@ -0,0 +1,159 @@ +'''ResNet in PyTorch. + +For Pre-activation ResNet, see 'preact_resnet.py'. + +Reference: +[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun + Deep Residual Learning for Image Recognition. arXiv:1512.03385 +''' +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, in_planes, planes, stride=1): + super(BasicBlock, self).__init__() + self.conv1 = nn.Conv2d( + in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, + stride=1, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + + self.shortcut = nn.Sequential() + if stride != 1 or in_planes != self.expansion*planes: + self.shortcut = nn.Sequential( + nn.Conv2d(in_planes, self.expansion*planes, + kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(self.expansion*planes) + ) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = self.bn2(self.conv2(out)) + out += self.shortcut(x) + out = F.relu(out) + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, in_planes, planes, stride=1): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, + stride=stride, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d(planes, self.expansion * + planes, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(self.expansion*planes) + + self.shortcut = nn.Sequential() + if stride != 1 or in_planes != self.expansion*planes: + self.shortcut = nn.Sequential( + nn.Conv2d(in_planes, self.expansion*planes, + kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(self.expansion*planes) + ) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = F.relu(self.bn2(self.conv2(out))) + out = self.bn3(self.conv3(out)) + out += self.shortcut(x) + out = F.relu(out) + return out + + +class ResNet(nn.Module): + def __init__(self, block, num_blocks, num_classes=10,stoch=False): + super(ResNet, self).__init__() + self.in_planes = 64 + + self.conv1 = nn.Conv2d(3, 64, kernel_size=3, + stride=1, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) + self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) + self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) + self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) + self.linear = nn.Linear(512*block.expansion, num_classes) + self.stoch = stoch + + def _make_layer(self, block, planes, num_blocks, stride): + strides = [stride] + [1]*(num_blocks-1) + layers = [] + for stride in strides: + layers.append(block(self.in_planes, planes, stride)) + self.in_planes = planes * block.expansion + return nn.Sequential(*layers) + + def savg_pool2d(self,x,size,locx=-1,locy=-1): + b,c,h,w = x.shape + if loc==-1: + selh = torch.LongTensor(h/size,w/size).random_(0, size) + else: + selh = torch.ones(h/size,w/size).long()*loc + rngh = torch.arange(0,h,size).long().view(h/size,1).repeat(1,w/size).view(h/size,w/size) + selx = (selh+rngh).repeat(b,c,1,1) + if loc==-1: + selw = torch.LongTensor(h/size,w/size).random_(0, size) + else: + selw = torch.ones(h/size,w/size).long()*loc + rngw = torch.arange(0,w,size).long().view(1,h/size).repeat(h/size,1).view(h/size,w/size) + sely = (selw+rngw).repeat(b,c,1,1) + bv, cv ,hv, wv = torch.meshgrid([torch.arange(0,b), torch.arange(0,c),torch.arange(0,h/size),torch.arange(0,w/size)]) + #x=x.view(b,c,h*w) + newx = x[bv,cv, selx, sely] + #ghdh + return newx + + def forward(self, x ,stoch = True): + #if self.training==False: + # stoch=False + out = F.relu(self.bn1(self.conv1(x))) + out = self.layer1(out) + out = self.layer2(out) + out = self.layer3(out) + out = self.layer4(out) + if self.stoch: + if stoch: + out = self.savg_pool2d(out, 4) + else: + out = F.avg_pool2d(out, 4) + out = out.view(out.size(0), -1) + out = self.linear(out) + return out + + +def MyResNet18(stoch=False): + return ResNet(BasicBlock, [2, 2, 2, 2],stoch=stoch) + + +def ResNet34(): + return ResNet(BasicBlock, [3, 4, 6, 3]) + + +def MyResNet50(): + return ResNet(Bottleneck, [3, 4, 6, 3]) + + +def ResNet101(): + return ResNet(Bottleneck, [3, 4, 23, 3]) + + +def ResNet152(): + return ResNet(Bottleneck, [3, 8, 36, 3]) + + +def test(): + net = ResNet18() + y = net(torch.randn(1, 3, 32, 32)) + print(y.size()) + +# test() diff --git a/models/Old/myresnet2.py b/models/Old/myresnet2.py new file mode 100644 index 0000000..bf7159f --- /dev/null +++ b/models/Old/myresnet2.py @@ -0,0 +1,187 @@ +'''ResNet in PyTorch. + +For Pre-activation ResNet, see 'preact_resnet.py'. + +Reference: +[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun + Deep Residual Learning for Image Recognition. arXiv:1512.03385 +''' +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, in_planes, planes, stride=1): + super(BasicBlock, self).__init__() + self.conv1 = nn.Conv2d( + in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, + stride=1, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + + self.shortcut = nn.Sequential() + if stride != 1 or in_planes != self.expansion*planes: + self.shortcut = nn.Sequential( + nn.Conv2d(in_planes, self.expansion*planes, + kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(self.expansion*planes) + ) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = self.bn2(self.conv2(out)) + out += self.shortcut(x) + out = F.relu(out) + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, in_planes, planes, stride=1): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, + stride=stride, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d(planes, self.expansion * + planes, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(self.expansion*planes) + + self.shortcut = nn.Sequential() + if stride != 1 or in_planes != self.expansion*planes: + self.shortcut = nn.Sequential( + nn.Conv2d(in_planes, self.expansion*planes, + kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(self.expansion*planes) + ) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = F.relu(self.bn2(self.conv2(out))) + out = self.bn3(self.conv3(out)) + out += self.shortcut(x) + out = F.relu(out) + return out + + +class ResNet(nn.Module): + def __init__(self, block, num_blocks, num_classes=10,stoch=False): + super(ResNet, self).__init__() + self.in_planes = 64 + + self.conv1 = nn.Conv2d(3, 64, kernel_size=3, + stride=1, padding=1, bias=False) + self.conv2 = nn.Conv2d(512, 512, kernel_size=3, + stride=1, padding=1, bias=True) + self.bn1 = nn.BatchNorm2d(64) + self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) + self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) + self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) + self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) + self.linear = nn.Linear(512*block.expansion, num_classes) + self.stoch = stoch + + def _make_layer(self, block, planes, num_blocks, stride): + strides = [stride] + [1]*(num_blocks-1) + layers = [] + for stride in strides: + layers.append(block(self.in_planes, planes, stride)) + self.in_planes = planes * block.expansion + return nn.Sequential(*layers) + + def myconv2d_avg(self, input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1,size=2): + batch_size, in_channels, in_h, in_w = input.shape + out_channels, in_channels, kh, kw = weight.shape + out_h = (in_h+2*padding)-2*(int(kh)/2) + out_w = (in_w+2*padding)-2*(int(kw)/2) + + unfold = torch.nn.Unfold(kernel_size=(kh, kw), dilation=dilation, padding=padding, stride=stride) + inp_unf = unfold(input).view(batch_size,in_channels*kh*kw,out_h,out_w) + sel_h = torch.LongTensor(out_h/size,out_w/size).random_(0, size)#.cuda() + rng_h = sel_h + torch.arange(0,out_h,size).long()#.cuda() + + sel_w = torch.LongTensor(out_h/size,out_w/size).random_(0, size)#.cuda() + rng_w = sel_w+torch.arange(0,out_w,size).long()#.cuda() + inp_unf = inp_unf[:,:,rng_h,rng_w].view(batch_size,in_channels*kh*kw,out_h/size*out_w/size) + #unfold_avg = torch.nn.Unfold(kernel_size=(1, 1), dilation=1, padding=0, stride=2) + + if bias is None: + out_unf = inp_unf.transpose(1, 2).matmul(weight.view(weight.size(0), -1).t()).transpose(1, 2) + else: + out_unf = (inp_unf.transpose(1, 2).matmul(weight.view(weight.size(0), -1).t()) + bias).transpose(1, 2) + + out = out_unf.view(batch_size, out_channels, out_h/size, out_w/size).contiguous() + return out + + + def savg_pool2d(self,x,size,locx=-1,locy=-1): + b,c,h,w = x.shape + if locx==-1: + selh = torch.LongTensor(h/size,w/size).random_(0, size) + else: + selh = torch.ones(h/size,w/size).long()*loc + rngh = torch.arange(0,h,size).long().view(h/size,1).repeat(1,w/size).view(h/size,w/size) + selx = (selh+rngh).repeat(b,c,1,1) + if locy==-1: + selw = torch.LongTensor(h/size,w/size).random_(0, size) + else: + selw = torch.ones(h/size,w/size).long()*loc + rngw = torch.arange(0,w,size).long().view(1,h/size).repeat(h/size,1).view(h/size,w/size) + sely = (selw+rngw).repeat(b,c,1,1) + bv, cv ,hv, wv = torch.meshgrid([torch.arange(0,b), torch.arange(0,c),torch.arange(0,h/size),torch.arange(0,w/size)]) + #x=x.view(b,c,h*w) + newx = x[bv,cv, selx, sely] + #ghdh + return newx + + def forward(self, x ,stoch = True): + #if self.training==False: + # stoch=False + out = F.relu(self.bn1(self.conv1(x))) + out = self.layer1(out) + out = self.layer2(out) + out = self.layer3(out) + out = self.layer4(out) + if self.stoch and stoch: + out = F.relu(self.myconv2d_avg(out, self.conv2.weight, bias=self.conv2.bias,padding=1,size=4)) + #out = F.avg_pool2d(out, 2) + else: + out = F.relu(self.myconv2d_avg(out, self.conv2.weight, bias=self.conv2.bias,padding=1,size=1)) + out = F.avg_pool2d(out, 4) + out = out.view(out.size(0), -1) + out = self.linear(out) + return out + + +def MyResNet18(stoch=False): + return ResNet(BasicBlock, [2, 2, 2, 2],stoch=stoch) + + +def ResNet34(): + return ResNet(BasicBlock, [3, 4, 6, 3]) + + +def MyResNet50(): + return ResNet(Bottleneck, [3, 4, 6, 3]) + + +def ResNet101(): + return ResNet(Bottleneck, [3, 4, 23, 3]) + + +def ResNet152(): + return ResNet(Bottleneck, [3, 8, 36, 3]) + + +def test(): + net = ResNet18() + y = net(torch.randn(1, 3, 32, 32)) + print(y.size()) + +# test() diff --git a/models/Old/pnasnet.py b/models/Old/pnasnet.py new file mode 100644 index 0000000..de8c4d5 --- /dev/null +++ b/models/Old/pnasnet.py @@ -0,0 +1,125 @@ +'''PNASNet in PyTorch. + +Paper: Progressive Neural Architecture Search +''' +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class SepConv(nn.Module): + '''Separable Convolution.''' + def __init__(self, in_planes, out_planes, kernel_size, stride): + super(SepConv, self).__init__() + self.conv1 = nn.Conv2d(in_planes, out_planes, + kernel_size, stride, + padding=(kernel_size-1)//2, + bias=False, groups=in_planes) + self.bn1 = nn.BatchNorm2d(out_planes) + + def forward(self, x): + return self.bn1(self.conv1(x)) + + +class CellA(nn.Module): + def __init__(self, in_planes, out_planes, stride=1): + super(CellA, self).__init__() + self.stride = stride + self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride) + if stride==2: + self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False) + self.bn1 = nn.BatchNorm2d(out_planes) + + def forward(self, x): + y1 = self.sep_conv1(x) + y2 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1) + if self.stride==2: + y2 = self.bn1(self.conv1(y2)) + return F.relu(y1+y2) + +class CellB(nn.Module): + def __init__(self, in_planes, out_planes, stride=1): + super(CellB, self).__init__() + self.stride = stride + # Left branch + self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride) + self.sep_conv2 = SepConv(in_planes, out_planes, kernel_size=3, stride=stride) + # Right branch + self.sep_conv3 = SepConv(in_planes, out_planes, kernel_size=5, stride=stride) + if stride==2: + self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False) + self.bn1 = nn.BatchNorm2d(out_planes) + # Reduce channels + self.conv2 = nn.Conv2d(2*out_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False) + self.bn2 = nn.BatchNorm2d(out_planes) + + def forward(self, x): + # Left branch + y1 = self.sep_conv1(x) + y2 = self.sep_conv2(x) + # Right branch + y3 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1) + if self.stride==2: + y3 = self.bn1(self.conv1(y3)) + y4 = self.sep_conv3(x) + # Concat & reduce channels + b1 = F.relu(y1+y2) + b2 = F.relu(y3+y4) + y = torch.cat([b1,b2], 1) + return F.relu(self.bn2(self.conv2(y))) + +class PNASNet(nn.Module): + def __init__(self, cell_type, num_cells, num_planes): + super(PNASNet, self).__init__() + self.in_planes = num_planes + self.cell_type = cell_type + + self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, stride=1, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(num_planes) + + self.layer1 = self._make_layer(num_planes, num_cells=6) + self.layer2 = self._downsample(num_planes*2) + self.layer3 = self._make_layer(num_planes*2, num_cells=6) + self.layer4 = self._downsample(num_planes*4) + self.layer5 = self._make_layer(num_planes*4, num_cells=6) + + self.linear = nn.Linear(num_planes*4, 10) + + def _make_layer(self, planes, num_cells): + layers = [] + for _ in range(num_cells): + layers.append(self.cell_type(self.in_planes, planes, stride=1)) + self.in_planes = planes + return nn.Sequential(*layers) + + def _downsample(self, planes): + layer = self.cell_type(self.in_planes, planes, stride=2) + self.in_planes = planes + return layer + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = self.layer1(out) + out = self.layer2(out) + out = self.layer3(out) + out = self.layer4(out) + out = self.layer5(out) + out = F.avg_pool2d(out, 8) + out = self.linear(out.view(out.size(0), -1)) + return out + + +def PNASNetA(): + return PNASNet(CellA, num_cells=6, num_planes=44) + +def PNASNetB(): + return PNASNet(CellB, num_cells=6, num_planes=32) + + +def test(): + net = PNASNetB() + x = torch.randn(1,3,32,32) + y = net(x) + print(y) + +# test() diff --git a/models/Old/preact_resnet.py b/models/Old/preact_resnet.py new file mode 100644 index 0000000..abb1bc3 --- /dev/null +++ b/models/Old/preact_resnet.py @@ -0,0 +1,118 @@ +'''Pre-activation ResNet in PyTorch. + +Reference: +[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun + Identity Mappings in Deep Residual Networks. arXiv:1603.05027 +''' +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class PreActBlock(nn.Module): + '''Pre-activation version of the BasicBlock.''' + expansion = 1 + + def __init__(self, in_planes, planes, stride=1): + super(PreActBlock, self).__init__() + self.bn1 = nn.BatchNorm2d(in_planes) + self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) + + if stride != 1 or in_planes != self.expansion*planes: + self.shortcut = nn.Sequential( + nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False) + ) + + def forward(self, x): + out = F.relu(self.bn1(x)) + shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x + out = self.conv1(out) + out = self.conv2(F.relu(self.bn2(out))) + out += shortcut + return out + + +class PreActBottleneck(nn.Module): + '''Pre-activation version of the original Bottleneck module.''' + expansion = 4 + + def __init__(self, in_planes, planes, stride=1): + super(PreActBottleneck, self).__init__() + self.bn1 = nn.BatchNorm2d(in_planes) + self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) + self.bn3 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False) + + if stride != 1 or in_planes != self.expansion*planes: + self.shortcut = nn.Sequential( + nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False) + ) + + def forward(self, x): + out = F.relu(self.bn1(x)) + shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x + out = self.conv1(out) + out = self.conv2(F.relu(self.bn2(out))) + out = self.conv3(F.relu(self.bn3(out))) + out += shortcut + return out + + +class PreActResNet(nn.Module): + def __init__(self, block, num_blocks, num_classes=10): + super(PreActResNet, self).__init__() + self.in_planes = 64 + + self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) + self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) + self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) + self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) + self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) + self.linear = nn.Linear(512*block.expansion, num_classes) + + def _make_layer(self, block, planes, num_blocks, stride): + strides = [stride] + [1]*(num_blocks-1) + layers = [] + for stride in strides: + layers.append(block(self.in_planes, planes, stride)) + self.in_planes = planes * block.expansion + return nn.Sequential(*layers) + + def forward(self, x): + out = self.conv1(x) + out = self.layer1(out) + out = self.layer2(out) + out = self.layer3(out) + out = self.layer4(out) + out = F.avg_pool2d(out, 4) + out = out.view(out.size(0), -1) + out = self.linear(out) + return out + + +def PreActResNet18(): + return PreActResNet(PreActBlock, [2,2,2,2]) + +def PreActResNet34(): + return PreActResNet(PreActBlock, [3,4,6,3]) + +def PreActResNet50(): + return PreActResNet(PreActBottleneck, [3,4,6,3]) + +def PreActResNet101(): + return PreActResNet(PreActBottleneck, [3,4,23,3]) + +def PreActResNet152(): + return PreActResNet(PreActBottleneck, [3,8,36,3]) + + +def test(): + net = PreActResNet18() + y = net((torch.randn(1,3,32,32))) + print(y.size()) + +# test() diff --git a/models/Old/regnet.py b/models/Old/regnet.py new file mode 100644 index 0000000..5d59c1a --- /dev/null +++ b/models/Old/regnet.py @@ -0,0 +1,155 @@ +'''RegNet in PyTorch. + +Paper: "Designing Network Design Spaces". + +Reference: https://github.com/keras-team/keras-applications/blob/master/keras_applications/efficientnet.py +''' +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class SE(nn.Module): + '''Squeeze-and-Excitation block.''' + + def __init__(self, in_planes, se_planes): + super(SE, self).__init__() + self.se1 = nn.Conv2d(in_planes, se_planes, kernel_size=1, bias=True) + self.se2 = nn.Conv2d(se_planes, in_planes, kernel_size=1, bias=True) + + def forward(self, x): + out = F.adaptive_avg_pool2d(x, (1, 1)) + out = F.relu(self.se1(out)) + out = self.se2(out).sigmoid() + out = x * out + return out + + +class Block(nn.Module): + def __init__(self, w_in, w_out, stride, group_width, bottleneck_ratio, se_ratio): + super(Block, self).__init__() + # 1x1 + w_b = int(round(w_out * bottleneck_ratio)) + self.conv1 = nn.Conv2d(w_in, w_b, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(w_b) + # 3x3 + num_groups = w_b // group_width + self.conv2 = nn.Conv2d(w_b, w_b, kernel_size=3, + stride=stride, padding=1, groups=num_groups, bias=False) + self.bn2 = nn.BatchNorm2d(w_b) + # se + self.with_se = se_ratio > 0 + if self.with_se: + w_se = int(round(w_in * se_ratio)) + self.se = SE(w_b, w_se) + # 1x1 + self.conv3 = nn.Conv2d(w_b, w_out, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(w_out) + + self.shortcut = nn.Sequential() + if stride != 1 or w_in != w_out: + self.shortcut = nn.Sequential( + nn.Conv2d(w_in, w_out, + kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(w_out) + ) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = F.relu(self.bn2(self.conv2(out))) + if self.with_se: + out = self.se(out) + out = self.bn3(self.conv3(out)) + out += self.shortcut(x) + out = F.relu(out) + return out + + +class RegNet(nn.Module): + def __init__(self, cfg, num_classes=10): + super(RegNet, self).__init__() + self.cfg = cfg + self.in_planes = 64 + self.conv1 = nn.Conv2d(3, 64, kernel_size=3, + stride=1, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.layer1 = self._make_layer(0) + self.layer2 = self._make_layer(1) + self.layer3 = self._make_layer(2) + self.layer4 = self._make_layer(3) + self.linear = nn.Linear(self.cfg['widths'][-1], num_classes) + + def _make_layer(self, idx): + depth = self.cfg['depths'][idx] + width = self.cfg['widths'][idx] + stride = self.cfg['strides'][idx] + group_width = self.cfg['group_width'] + bottleneck_ratio = self.cfg['bottleneck_ratio'] + se_ratio = self.cfg['se_ratio'] + + layers = [] + for i in range(depth): + s = stride if i == 0 else 1 + layers.append(Block(self.in_planes, width, + s, group_width, bottleneck_ratio, se_ratio)) + self.in_planes = width + return nn.Sequential(*layers) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = self.layer1(out) + out = self.layer2(out) + out = self.layer3(out) + out = self.layer4(out) + out = F.adaptive_avg_pool2d(out, (1, 1)) + out = out.view(out.size(0), -1) + out = self.linear(out) + return out + + +def RegNetX_200MF(): + cfg = { + 'depths': [1, 1, 4, 7], + 'widths': [24, 56, 152, 368], + 'strides': [1, 1, 2, 2], + 'group_width': 8, + 'bottleneck_ratio': 1, + 'se_ratio': 0, + } + return RegNet(cfg) + + +def RegNetX_400MF(): + cfg = { + 'depths': [1, 2, 7, 12], + 'widths': [32, 64, 160, 384], + 'strides': [1, 1, 2, 2], + 'group_width': 16, + 'bottleneck_ratio': 1, + 'se_ratio': 0, + } + return RegNet(cfg) + + +def RegNetY_400MF(): + cfg = { + 'depths': [1, 2, 7, 12], + 'widths': [32, 64, 160, 384], + 'strides': [1, 1, 2, 2], + 'group_width': 16, + 'bottleneck_ratio': 1, + 'se_ratio': 0.25, + } + return RegNet(cfg) + + +def test(): + net = RegNetX_200MF() + print(net) + x = torch.randn(2, 3, 32, 32) + y = net(x) + print(y.shape) + + +if __name__ == '__main__': + test() diff --git a/models/Old/resnet.py b/models/Old/resnet.py new file mode 100644 index 0000000..b77694c --- /dev/null +++ b/models/Old/resnet.py @@ -0,0 +1,132 @@ +'''ResNet in PyTorch. + +For Pre-activation ResNet, see 'preact_resnet.py'. + +Reference: +[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun + Deep Residual Learning for Image Recognition. arXiv:1512.03385 +''' +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, in_planes, planes, stride=1): + super(BasicBlock, self).__init__() + self.conv1 = nn.Conv2d( + in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, + stride=1, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + + self.shortcut = nn.Sequential() + if stride != 1 or in_planes != self.expansion*planes: + self.shortcut = nn.Sequential( + nn.Conv2d(in_planes, self.expansion*planes, + kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(self.expansion*planes) + ) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = self.bn2(self.conv2(out)) + out += self.shortcut(x) + out = F.relu(out) + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, in_planes, planes, stride=1): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, + stride=stride, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d(planes, self.expansion * + planes, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(self.expansion*planes) + + self.shortcut = nn.Sequential() + if stride != 1 or in_planes != self.expansion*planes: + self.shortcut = nn.Sequential( + nn.Conv2d(in_planes, self.expansion*planes, + kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(self.expansion*planes) + ) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = F.relu(self.bn2(self.conv2(out))) + out = self.bn3(self.conv3(out)) + out += self.shortcut(x) + out = F.relu(out) + return out + + +class ResNet(nn.Module): + def __init__(self, block, num_blocks, num_classes=10): + super(ResNet, self).__init__() + self.in_planes = 64 + + self.conv1 = nn.Conv2d(3, 64, kernel_size=3, + stride=1, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) + self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) + self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) + self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) + self.linear = nn.Linear(512*block.expansion, num_classes) + + def _make_layer(self, block, planes, num_blocks, stride): + strides = [stride] + [1]*(num_blocks-1) + layers = [] + for stride in strides: + layers.append(block(self.in_planes, planes, stride)) + self.in_planes = planes * block.expansion + return nn.Sequential(*layers) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = self.layer1(out) + out = self.layer2(out) + out = self.layer3(out) + out = self.layer4(out) + out = F.avg_pool2d(out, 4) + out = out.view(out.size(0), -1) + out = self.linear(out) + return out + + +def ResNet18(): + return ResNet(BasicBlock, [2, 2, 2, 2]) + + +def ResNet34(): + return ResNet(BasicBlock, [3, 4, 6, 3]) + + +def ResNet50(): + return ResNet(Bottleneck, [3, 4, 6, 3]) + + +def ResNet101(): + return ResNet(Bottleneck, [3, 4, 23, 3]) + + +def ResNet152(): + return ResNet(Bottleneck, [3, 8, 36, 3]) + + +def test(): + net = ResNet18() + y = net(torch.randn(1, 3, 32, 32)) + print(y.size()) + +# test() diff --git a/models/Old/resnext.py b/models/Old/resnext.py new file mode 100644 index 0000000..7a08f3e --- /dev/null +++ b/models/Old/resnext.py @@ -0,0 +1,95 @@ +'''ResNeXt in PyTorch. + +See the paper "Aggregated Residual Transformations for Deep Neural Networks" for more details. +''' +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class Block(nn.Module): + '''Grouped convolution block.''' + expansion = 2 + + def __init__(self, in_planes, cardinality=32, bottleneck_width=4, stride=1): + super(Block, self).__init__() + group_width = cardinality * bottleneck_width + self.conv1 = nn.Conv2d(in_planes, group_width, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(group_width) + self.conv2 = nn.Conv2d(group_width, group_width, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False) + self.bn2 = nn.BatchNorm2d(group_width) + self.conv3 = nn.Conv2d(group_width, self.expansion*group_width, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(self.expansion*group_width) + + self.shortcut = nn.Sequential() + if stride != 1 or in_planes != self.expansion*group_width: + self.shortcut = nn.Sequential( + nn.Conv2d(in_planes, self.expansion*group_width, kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(self.expansion*group_width) + ) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = F.relu(self.bn2(self.conv2(out))) + out = self.bn3(self.conv3(out)) + out += self.shortcut(x) + out = F.relu(out) + return out + + +class ResNeXt(nn.Module): + def __init__(self, num_blocks, cardinality, bottleneck_width, num_classes=10): + super(ResNeXt, self).__init__() + self.cardinality = cardinality + self.bottleneck_width = bottleneck_width + self.in_planes = 64 + + self.conv1 = nn.Conv2d(3, 64, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.layer1 = self._make_layer(num_blocks[0], 1) + self.layer2 = self._make_layer(num_blocks[1], 2) + self.layer3 = self._make_layer(num_blocks[2], 2) + # self.layer4 = self._make_layer(num_blocks[3], 2) + self.linear = nn.Linear(cardinality*bottleneck_width*8, num_classes) + + def _make_layer(self, num_blocks, stride): + strides = [stride] + [1]*(num_blocks-1) + layers = [] + for stride in strides: + layers.append(Block(self.in_planes, self.cardinality, self.bottleneck_width, stride)) + self.in_planes = Block.expansion * self.cardinality * self.bottleneck_width + # Increase bottleneck_width by 2 after each stage. + self.bottleneck_width *= 2 + return nn.Sequential(*layers) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = self.layer1(out) + out = self.layer2(out) + out = self.layer3(out) + # out = self.layer4(out) + out = F.avg_pool2d(out, 8) + out = out.view(out.size(0), -1) + out = self.linear(out) + return out + + +def ResNeXt29_2x64d(): + return ResNeXt(num_blocks=[3,3,3], cardinality=2, bottleneck_width=64) + +def ResNeXt29_4x64d(): + return ResNeXt(num_blocks=[3,3,3], cardinality=4, bottleneck_width=64) + +def ResNeXt29_8x64d(): + return ResNeXt(num_blocks=[3,3,3], cardinality=8, bottleneck_width=64) + +def ResNeXt29_32x4d(): + return ResNeXt(num_blocks=[3,3,3], cardinality=32, bottleneck_width=4) + +def test_resnext(): + net = ResNeXt29_2x64d() + x = torch.randn(1,3,32,32) + y = net(x) + print(y.size()) + +# test_resnext() diff --git a/models/Old/sconv2davg.py b/models/Old/sconv2davg.py new file mode 100644 index 0000000..770dba5 --- /dev/null +++ b/models/Old/sconv2davg.py @@ -0,0 +1,140 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +import math + +class SConv2dAvg(nn.Module): + def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1,ceil_mode=True): + super(SConv2dAvg, self).__init__() + conv = nn.Conv2d(in_channels, out_channels, kernel_size) + self.deconv = nn.ConvTranspose2d(1, 1, kernel_size, 1, padding=0, output_padding=0, groups=1, bias=False, dilation=1, padding_mode='zeros') + nn.init.constant_(self.deconv.weight, 1) + self.pooldeconv = nn.ConvTranspose2d(1, 1, kernel_size=stride,padding=0,stride=stride, output_padding=0, groups=1, bias=False, dilation=1, padding_mode='zeros') + nn.init.constant_(self.pooldeconv.weight, 1) + self.weight = nn.Parameter(conv.weight) + self.bias = nn.Parameter(conv.bias) + self.stride = stride + self.dilation = dilation + self.padding = padding + self.kernel_size = kernel_size + self.ceil_mode = ceil_mode + + def forward(self, input, selh=-torch.ones(1,1), selw=-torch.ones(1,1), mask=-torch.ones(1,1),stoch=False,stride=-1): + device=input.device + if stride==-1: + stride = self.stride + #stoch=True + if stoch==False: + stride=1 #test with real average pooling + batch_size, in_channels, in_h, in_w = input.shape + out_channels, in_channels, kh, kw = self.weight.shape + + afterconv_h = in_h-(kh-1) #size after conv + afterconv_w = in_w-(kw-1) + if self.ceil_mode: + out_h = math.ceil(afterconv_h/stride) + out_w = math.ceil(afterconv_w/stride) + else: + out_h = math.floor(afterconv_h/stride) + out_w = math.floor(afterconv_w/stride) + unfold = torch.nn.Unfold(kernel_size=(kh, kw), dilation=self.dilation, padding=self.padding, stride=1) + inp_unf = unfold(input) + if stride!=1: + inp_unf = inp_unf.view(batch_size,in_channels*kh*kw,afterconv_h,afterconv_w) + if selh[0,0]==-1: + resth = (out_h*stride)-afterconv_h + restw = (out_w*stride)-afterconv_w + selh = torch.randint(stride,(out_h,out_w), device=device) + selw = torch.randint(stride,(out_h,out_w), device=device) + # print(selh.shape) + if resth!=0: + # Cas : (stride-resth)=0 ? + selh[-1,:]=selh[-1,:]%(stride-resth);selh[:,-1]=selh[:,-1]%(stride-restw) + selw[-1,:]=selw[-1,:]%(stride-resth);selw[:,-1]=selw[:,-1]%(stride-restw) + rng_h = selh + torch.arange(0,out_h*stride,stride,device=device).view(-1,1) + rng_w = selw + torch.arange(0,out_w*stride,stride,device=device) + + if mask[0,0]==-1: + inp_unf = inp_unf[:,:,rng_h,rng_w].view(batch_size,in_channels*kh*kw,-1) + else: + inp_unf = inp_unf[:,:,rng_h[mask>0],rng_w[mask>0]] + + #Matrix mul + if self.bias is None: + out_unf = inp_unf.transpose(1, 2).matmul(self.weight.view(self.weight.size(0), -1).t()).transpose(1, 2) + else: + out_unf = (inp_unf.transpose(1, 2).matmul(self.weight.view(self.weight.size(0), -1).t()) + self.bias).transpose(1, 2) + + if stride==1 or mask[0,0]==-1: + out = out_unf.view(batch_size,out_channels,out_h,out_w) #Fold + if stoch==False: + out = F.avg_pool2d(out,self.stride,ceil_mode=True) + else: + out = torch.zeros(batch_size, out_channels,out_h,out_w,device=device) + out[:,:,mask>0] = out_unf + return out + + def comp(self,h,w,mask=-torch.ones(1,1)): + out_h = (h-(self.kernel_size))/self.stride + out_w = (w-(self.kernel_size))/self.stride + if self.ceil_mode: + out_h = math.ceil(out_h) + out_w = math.ceil(out_w) + else: + out_h = math.floor(out_h) + out_w = math.florr(out_w) + if mask[0,0]==-1: + comp = self.weight.numel()*out_h*out_w + else: + comp = self.weight.numel()*(mask>0).sum() + return comp + + def sample(self,h,w,mask): + ''' + h, w : forward input shape + mask : mask of output used in computation + ''' + stride = self.stride + out_channels, in_channels, kh, kw = self.weight.shape + device=mask.device + + afterconv_h = h-(kh-1) #Pk afterconv ? + afterconv_w = w-(kw-1) + if self.ceil_mode: + out_h = math.ceil(afterconv_h/stride) + out_w = math.ceil(afterconv_w/stride) + else: + out_h = math.floor(afterconv_h/stride) + out_w = math.floor(afterconv_w/stride) + selh = torch.randint(stride,(out_h,out_w), device=device) + selw = torch.randint(stride,(out_h,out_w), device=device) + + resth = (out_h*stride)-afterconv_h #simplement egale a stride-1, non ? + restw = (out_w*stride)-afterconv_w + # print('resth', resth, self.stride) + if resth!=0: + selh[-1,:]=selh[-1,:]%(stride-resth);selh[:,-1]=selh[:,-1]%(stride-restw) + selw[-1,:]=selw[-1,:]%(stride-resth);selw[:,-1]=selw[:,-1]%(stride-restw) + maskh = (out_h)*stride + maskw = (out_w)*stride + rng_h = selh + torch.arange(0,out_h*stride,stride,device=device).view(-1,1) + rng_w = selw + torch.arange(0,out_w*stride,stride,device=device) + # rng_w = selw + torch.arange(0,out_w*self.stride,self.stride,device=device).view(-1,1) + nmask = torch.zeros((maskh,maskw),device=device) + nmask[rng_h,rng_w] = 1 + #rmask = mask * nmask + dmask = self.pooldeconv(mask.float().view(1,1,mask.shape[0],mask.shape[1])) + rmask = nmask * dmask + #rmask = rmask[:,:,:out_h,:out_w] + fmask = self.deconv(rmask) + fmask = fmask[0,0] + return selh,selw,fmask.long() + + def get_size(self,h,w): + # newh=(h-(self.kernel_size-1)+(self.stride-1))/self.stride + # neww=(w-(self.kernel_size-1)+(self.stride-1))/self.stride + # print(newh,neww) + newh=math.floor(((h + 2*self.padding - self.dilation*(self.kernel_size-1) - 1)/self.stride) + 1) + neww=math.floor(((w + 2*self.padding - self.dilation*(self.kernel_size-1) - 1)/self.stride) + 1) + return newh, neww diff --git a/models/Old/senet.py b/models/Old/senet.py new file mode 100644 index 0000000..98bfa0c --- /dev/null +++ b/models/Old/senet.py @@ -0,0 +1,121 @@ +'''SENet in PyTorch. + +SENet is the winner of ImageNet-2017. The paper is not released yet. +''' +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class BasicBlock(nn.Module): + def __init__(self, in_planes, planes, stride=1): + super(BasicBlock, self).__init__() + self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + + self.shortcut = nn.Sequential() + if stride != 1 or in_planes != planes: + self.shortcut = nn.Sequential( + nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(planes) + ) + + # SE layers + self.fc1 = nn.Conv2d(planes, planes//16, kernel_size=1) # Use nn.Conv2d instead of nn.Linear + self.fc2 = nn.Conv2d(planes//16, planes, kernel_size=1) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = self.bn2(self.conv2(out)) + + # Squeeze + w = F.avg_pool2d(out, out.size(2)) + w = F.relu(self.fc1(w)) + w = F.sigmoid(self.fc2(w)) + # Excitation + out = out * w # New broadcasting feature from v0.2! + + out += self.shortcut(x) + out = F.relu(out) + return out + + +class PreActBlock(nn.Module): + def __init__(self, in_planes, planes, stride=1): + super(PreActBlock, self).__init__() + self.bn1 = nn.BatchNorm2d(in_planes) + self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) + + if stride != 1 or in_planes != planes: + self.shortcut = nn.Sequential( + nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False) + ) + + # SE layers + self.fc1 = nn.Conv2d(planes, planes//16, kernel_size=1) + self.fc2 = nn.Conv2d(planes//16, planes, kernel_size=1) + + def forward(self, x): + out = F.relu(self.bn1(x)) + shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x + out = self.conv1(out) + out = self.conv2(F.relu(self.bn2(out))) + + # Squeeze + w = F.avg_pool2d(out, out.size(2)) + w = F.relu(self.fc1(w)) + w = F.sigmoid(self.fc2(w)) + # Excitation + out = out * w + + out += shortcut + return out + + +class SENet(nn.Module): + def __init__(self, block, num_blocks, num_classes=10): + super(SENet, self).__init__() + self.in_planes = 64 + + self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) + self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) + self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) + self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) + self.linear = nn.Linear(512, num_classes) + + def _make_layer(self, block, planes, num_blocks, stride): + strides = [stride] + [1]*(num_blocks-1) + layers = [] + for stride in strides: + layers.append(block(self.in_planes, planes, stride)) + self.in_planes = planes + return nn.Sequential(*layers) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = self.layer1(out) + out = self.layer2(out) + out = self.layer3(out) + out = self.layer4(out) + out = F.avg_pool2d(out, 4) + out = out.view(out.size(0), -1) + out = self.linear(out) + return out + + +def SENet18(): + return SENet(PreActBlock, [2,2,2,2]) + + +def test(): + net = SENet18() + y = net(torch.randn(1,3,32,32)) + print(y.size()) + +# test() diff --git a/models/Old/shufflenet.py b/models/Old/shufflenet.py new file mode 100644 index 0000000..acff6f7 --- /dev/null +++ b/models/Old/shufflenet.py @@ -0,0 +1,109 @@ +'''ShuffleNet in PyTorch. + +See the paper "ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices" for more details. +''' +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class ShuffleBlock(nn.Module): + def __init__(self, groups): + super(ShuffleBlock, self).__init__() + self.groups = groups + + def forward(self, x): + '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]''' + N,C,H,W = x.size() + g = self.groups + return x.view(N,g,C//g,H,W).permute(0,2,1,3,4).reshape(N,C,H,W) + + +class Bottleneck(nn.Module): + def __init__(self, in_planes, out_planes, stride, groups): + super(Bottleneck, self).__init__() + self.stride = stride + + mid_planes = out_planes/4 + g = 1 if in_planes==24 else groups + self.conv1 = nn.Conv2d(in_planes, mid_planes, kernel_size=1, groups=g, bias=False) + self.bn1 = nn.BatchNorm2d(mid_planes) + self.shuffle1 = ShuffleBlock(groups=g) + self.conv2 = nn.Conv2d(mid_planes, mid_planes, kernel_size=3, stride=stride, padding=1, groups=mid_planes, bias=False) + self.bn2 = nn.BatchNorm2d(mid_planes) + self.conv3 = nn.Conv2d(mid_planes, out_planes, kernel_size=1, groups=groups, bias=False) + self.bn3 = nn.BatchNorm2d(out_planes) + + self.shortcut = nn.Sequential() + if stride == 2: + self.shortcut = nn.Sequential(nn.AvgPool2d(3, stride=2, padding=1)) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = self.shuffle1(out) + out = F.relu(self.bn2(self.conv2(out))) + out = self.bn3(self.conv3(out)) + res = self.shortcut(x) + out = F.relu(torch.cat([out,res], 1)) if self.stride==2 else F.relu(out+res) + return out + + +class ShuffleNet(nn.Module): + def __init__(self, cfg): + super(ShuffleNet, self).__init__() + out_planes = cfg['out_planes'] + num_blocks = cfg['num_blocks'] + groups = cfg['groups'] + + self.conv1 = nn.Conv2d(3, 24, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(24) + self.in_planes = 24 + self.layer1 = self._make_layer(out_planes[0], num_blocks[0], groups) + self.layer2 = self._make_layer(out_planes[1], num_blocks[1], groups) + self.layer3 = self._make_layer(out_planes[2], num_blocks[2], groups) + self.linear = nn.Linear(out_planes[2], 10) + + def _make_layer(self, out_planes, num_blocks, groups): + layers = [] + for i in range(num_blocks): + stride = 2 if i == 0 else 1 + cat_planes = self.in_planes if i == 0 else 0 + layers.append(Bottleneck(self.in_planes, out_planes-cat_planes, stride=stride, groups=groups)) + self.in_planes = out_planes + return nn.Sequential(*layers) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = self.layer1(out) + out = self.layer2(out) + out = self.layer3(out) + out = F.avg_pool2d(out, 4) + out = out.view(out.size(0), -1) + out = self.linear(out) + return out + + +def ShuffleNetG2(): + cfg = { + 'out_planes': [200,400,800], + 'num_blocks': [4,8,4], + 'groups': 2 + } + return ShuffleNet(cfg) + +def ShuffleNetG3(): + cfg = { + 'out_planes': [240,480,960], + 'num_blocks': [4,8,4], + 'groups': 3 + } + return ShuffleNet(cfg) + + +def test(): + net = ShuffleNetG2() + x = torch.randn(1,3,32,32) + y = net(x) + print(y) + +# test() diff --git a/models/Old/shufflenetv2.py b/models/Old/shufflenetv2.py new file mode 100644 index 0000000..eefcda3 --- /dev/null +++ b/models/Old/shufflenetv2.py @@ -0,0 +1,162 @@ +'''ShuffleNetV2 in PyTorch. + +See the paper "ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" for more details. +''' +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class ShuffleBlock(nn.Module): + def __init__(self, groups=2): + super(ShuffleBlock, self).__init__() + self.groups = groups + + def forward(self, x): + '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]''' + N, C, H, W = x.size() + g = self.groups + return x.view(N, g, C//g, H, W).permute(0, 2, 1, 3, 4).reshape(N, C, H, W) + + +class SplitBlock(nn.Module): + def __init__(self, ratio): + super(SplitBlock, self).__init__() + self.ratio = ratio + + def forward(self, x): + c = int(x.size(1) * self.ratio) + return x[:, :c, :, :], x[:, c:, :, :] + + +class BasicBlock(nn.Module): + def __init__(self, in_channels, split_ratio=0.5): + super(BasicBlock, self).__init__() + self.split = SplitBlock(split_ratio) + in_channels = int(in_channels * split_ratio) + self.conv1 = nn.Conv2d(in_channels, in_channels, + kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(in_channels) + self.conv2 = nn.Conv2d(in_channels, in_channels, + kernel_size=3, stride=1, padding=1, groups=in_channels, bias=False) + self.bn2 = nn.BatchNorm2d(in_channels) + self.conv3 = nn.Conv2d(in_channels, in_channels, + kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(in_channels) + self.shuffle = ShuffleBlock() + + def forward(self, x): + x1, x2 = self.split(x) + out = F.relu(self.bn1(self.conv1(x2))) + out = self.bn2(self.conv2(out)) + out = F.relu(self.bn3(self.conv3(out))) + out = torch.cat([x1, out], 1) + out = self.shuffle(out) + return out + + +class DownBlock(nn.Module): + def __init__(self, in_channels, out_channels): + super(DownBlock, self).__init__() + mid_channels = out_channels // 2 + # left + self.conv1 = nn.Conv2d(in_channels, in_channels, + kernel_size=3, stride=2, padding=1, groups=in_channels, bias=False) + self.bn1 = nn.BatchNorm2d(in_channels) + self.conv2 = nn.Conv2d(in_channels, mid_channels, + kernel_size=1, bias=False) + self.bn2 = nn.BatchNorm2d(mid_channels) + # right + self.conv3 = nn.Conv2d(in_channels, mid_channels, + kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(mid_channels) + self.conv4 = nn.Conv2d(mid_channels, mid_channels, + kernel_size=3, stride=2, padding=1, groups=mid_channels, bias=False) + self.bn4 = nn.BatchNorm2d(mid_channels) + self.conv5 = nn.Conv2d(mid_channels, mid_channels, + kernel_size=1, bias=False) + self.bn5 = nn.BatchNorm2d(mid_channels) + + self.shuffle = ShuffleBlock() + + def forward(self, x): + # left + out1 = self.bn1(self.conv1(x)) + out1 = F.relu(self.bn2(self.conv2(out1))) + # right + out2 = F.relu(self.bn3(self.conv3(x))) + out2 = self.bn4(self.conv4(out2)) + out2 = F.relu(self.bn5(self.conv5(out2))) + # concat + out = torch.cat([out1, out2], 1) + out = self.shuffle(out) + return out + + +class ShuffleNetV2(nn.Module): + def __init__(self, net_size): + super(ShuffleNetV2, self).__init__() + out_channels = configs[net_size]['out_channels'] + num_blocks = configs[net_size]['num_blocks'] + + self.conv1 = nn.Conv2d(3, 24, kernel_size=3, + stride=1, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(24) + self.in_channels = 24 + self.layer1 = self._make_layer(out_channels[0], num_blocks[0]) + self.layer2 = self._make_layer(out_channels[1], num_blocks[1]) + self.layer3 = self._make_layer(out_channels[2], num_blocks[2]) + self.conv2 = nn.Conv2d(out_channels[2], out_channels[3], + kernel_size=1, stride=1, padding=0, bias=False) + self.bn2 = nn.BatchNorm2d(out_channels[3]) + self.linear = nn.Linear(out_channels[3], 10) + + def _make_layer(self, out_channels, num_blocks): + layers = [DownBlock(self.in_channels, out_channels)] + for i in range(num_blocks): + layers.append(BasicBlock(out_channels)) + self.in_channels = out_channels + return nn.Sequential(*layers) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + # out = F.max_pool2d(out, 3, stride=2, padding=1) + out = self.layer1(out) + out = self.layer2(out) + out = self.layer3(out) + out = F.relu(self.bn2(self.conv2(out))) + out = F.avg_pool2d(out, 4) + out = out.view(out.size(0), -1) + out = self.linear(out) + return out + + +configs = { + 0.5: { + 'out_channels': (48, 96, 192, 1024), + 'num_blocks': (3, 7, 3) + }, + + 1: { + 'out_channels': (116, 232, 464, 1024), + 'num_blocks': (3, 7, 3) + }, + 1.5: { + 'out_channels': (176, 352, 704, 1024), + 'num_blocks': (3, 7, 3) + }, + 2: { + 'out_channels': (224, 488, 976, 2048), + 'num_blocks': (3, 7, 3) + } +} + + +def test(): + net = ShuffleNetV2(net_size=0.5) + x = torch.randn(3, 3, 32, 32) + y = net(x) + print(y.shape) + + +# test() diff --git a/models/Old/vgg.py b/models/Old/vgg.py new file mode 100644 index 0000000..08347ff --- /dev/null +++ b/models/Old/vgg.py @@ -0,0 +1,47 @@ +'''VGG11/13/16/19 in Pytorch.''' +import torch +import torch.nn as nn + + +cfg = { + 'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], + 'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], + 'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], + 'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], +} + + +class VGG(nn.Module): + def __init__(self, vgg_name): + super(VGG, self).__init__() + self.features = self._make_layers(cfg[vgg_name]) + self.classifier = nn.Linear(512, 10) + + def forward(self, x): + out = self.features(x) + out = out.view(out.size(0), -1) + out = self.classifier(out) + return out + + def _make_layers(self, cfg): + layers = [] + in_channels = 3 + for x in cfg: + if x == 'M': + layers += [nn.MaxPool2d(kernel_size=2, stride=2)] + else: + layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1), + nn.BatchNorm2d(x), + nn.ReLU(inplace=True)] + in_channels = x + layers += [nn.AvgPool2d(kernel_size=1, stride=1)] + return nn.Sequential(*layers) + + +def test(): + net = VGG('VGG11') + x = torch.randn(2,3,32,32) + y = net(x) + print(y.size()) + +# test() diff --git a/models/__init__.py b/models/__init__.py new file mode 100644 index 0000000..5c2679e --- /dev/null +++ b/models/__init__.py @@ -0,0 +1,2 @@ +from .mylenet4 import * +from .myresnet3 import * diff --git a/models/mylenet4.py b/models/mylenet4.py new file mode 100644 index 0000000..b2364c5 --- /dev/null +++ b/models/mylenet4.py @@ -0,0 +1,314 @@ +'''LeNet in PyTorch.''' +import torch +import torch.nn as nn +import torch.nn.functional as F + +from .stoch import SConv2dAvg +from .stochsim import savg_pool2d + +class MyLeNetNormal(nn.Module):#epoch 12s + def __init__(self): + super(MyLeNetNormal, self).__init__() + self.conv1 = nn.Conv2d(3, 200, 3, stride=1) + self.conv2 = nn.Conv2d(200, 400, 3, stride=1) + self.conv3 = nn.Conv2d(400, 800, 3, stride=1) + self.conv4 = nn.Conv2d(800, 10, 3, stride=1) + #self.fc1 = nn.Linear(800, 10) + + def forward(self, x, stoch=True): + + out = F.relu(self.conv1(x)) + out = F.avg_pool2d(out,2,ceil_mode=True) + out = F.relu(self.conv2(out)) + out = F.avg_pool2d(out,2,ceil_mode=True) + out = F.relu(self.conv3(out)) + out = F.avg_pool2d(out,2,ceil_mode=True) + out = self.conv4(out) + #out = F.avg_pool2d(out,2,ceil_mode=True) + + out = out.view(out.size(0), -1 ) + #out = (self.fc1(out)) + return out + +class MyLeNetSimNormal(nn.Module):#epoch 12s + def __init__(self): + super(MyLeNetSimNormal, self).__init__() + self.conv1 = nn.Conv2d(3, 200, 3, stride=1) + self.conv2 = nn.Conv2d(200, 400, 3, stride=1) + self.conv3 = nn.Conv2d(400, 800, 3, stride=1) + self.conv4 = nn.Conv2d(800, 10, 3, stride=1) + #self.fc1 = nn.Linear(800, 10) + + def forward(self, x, stoch=True): + + out = F.relu(self.conv1(x)) + # out = self.savg_pool2d(out,2,ceil_mode=True) + out = savg_pool2d(out,2, mode='s', ceil_mode=True) + out = F.relu(self.conv2(out)) + # out = self.savg_pool2d(out,2,ceil_mode=True) + out = savg_pool2d(out,2, mode='s', ceil_mode=True) + out = F.relu(self.conv3(out)) + # out = self.savg_pool2d(out,2,ceil_mode=True) + out = savg_pool2d(out,2, mode='s', ceil_mode=True) + out = self.conv4(out) + #out = F.avg_pool2d(out,2,ceil_mode=True) + + out = out.view(out.size(0), -1 ) + #out = (self.fc1(out)) + return out + + +class MyLeNetStride(nn.Module):#epoch 6s + def __init__(self): + super(MyLeNetStride, self).__init__() + self.conv1 = nn.Conv2d(3, 200, 3, stride=2) + self.conv2 = nn.Conv2d(200, 400, 3, stride=2) + self.conv3 = nn.Conv2d(400, 800, 3, stride=2) + self.conv4 = nn.Conv2d(800, 10, 3, stride=1) + #self.fc1 = nn.Linear(800, 10) + + def forward(self, x, stoch=True): + + out = F.relu(self.conv1(x)) + out = F.relu(self.conv2(out)) + out = F.relu(self.conv3(out)) + out = self.conv4(out) + + out = out.view(out.size(0), -1 ) + #out = (self.fc1(out)) + return out + +class MyLeNetMatNormal(nn.Module):#epach 21s + def __init__(self): + super(MyLeNetMatNormal, self).__init__() + self.conv1 = SConv2dAvg(3, 200, 3, stride=1) + self.conv2 = SConv2dAvg(200, 400, 3, stride=1) + self.conv3 = SConv2dAvg(400, 800, 3, stride=1) + self.conv4 = SConv2dAvg(800, 10, 3, stride=1) + #self.fc1 = nn.Linear(800, 10) + + def forward(self, x, stoch=True): + out = F.relu(self.conv1(x)) + out = F.avg_pool2d(out,2,ceil_mode=True) + out = F.relu(self.conv2(out)) + out = F.avg_pool2d(out,2,ceil_mode=True) + out = F.relu(self.conv3(out)) + out = F.avg_pool2d(out,2,ceil_mode=True) + out = (self.conv4(out)) + #out = F.avg_pool2d(out,1,ceil_mode=True) + + out = out.view(out.size(0), -1 ) + #out = (self.fc1(out)) + return out + +class MyLeNetMatStoch(nn.Module):#epoch 17s + def __init__(self): + super(MyLeNetMatStoch, self).__init__() + self.conv1 = SConv2dAvg(3, 200, 3, stride=2) + self.conv2 = SConv2dAvg(200, 400, 3, stride=2) + self.conv3 = SConv2dAvg(400, 800, 3, stride=2) + self.conv4 = SConv2dAvg(800, 10, 3, stride=1) + #self.fc1 = nn.Linear(800, 10) + + def forward(self, x, stoch=True): + print('in',x.shape) + out = F.relu(self.conv1(x,stoch=stoch)) + print('c1',out.shape) + out = F.relu(self.conv2(out,stoch=stoch)) + print('c2', out.shape) + out = F.relu(self.conv3(out,stoch=stoch)) + print('c3',out.shape) + #hkjhlg + out = self.conv4(out,stoch=stoch) + print('c4',out.shape) + out = out.view(out.size(0), -1 ) + #out = self.fc1(out) + return out + +class MyLeNetMatStochBU(nn.Module):#epoch 11s + def __init__(self): + super(MyLeNetMatStochBU, self).__init__() + self.conv1 = SConv2dAvg(3, 200, 3, stride=2) + self.conv2 = SConv2dAvg(200, 400, 3, stride=2) + self.conv3 = SConv2dAvg(400, 800, 3, stride=2, ceil_mode=True) + self.conv4 = SConv2dAvg(800, 10, 3, stride=1) + # self.fc1 = nn.Linear(800, 10) + + def forward(self, x, stoch=True): + #get sizes + h0,w0 = x.shape[2],x.shape[3] + h1,w1 = self.conv1.get_size(h0,w0) + h2,w2 = self.conv2.get_size(h1,w1) + h3,w3 = self.conv3.get_size(h2,w2) + print(h0,w0) + print(h1,w1) + print(h2,w2) + print(h3,w3) + + #sample BU + mask3 = torch.ones(h3,w3).to(x.device) + print(mask3.shape) + selh3,selw3,mask2 = self.conv3.sample(h2,w2,mask=mask3) + print(mask2.shape) + selh2,selw2,mask1 = self.conv2.sample(h1,w1,mask=mask2) + print(mask1.shape) + selh1,selw1,mask0 = self.conv1.sample(h0,w0,mask=mask1) + #forward + out = F.relu(self.conv1(x,selh1,selw1,mask1,stoch=stoch)) + out = F.relu(self.conv2(out,selh2,selw2,mask2,stoch=stoch)) + out = F.relu(self.conv3(out,selh3,selw3,mask3,stoch=stoch)) + + out = self.conv4(out,stoch=stoch) + out = out.view(out.size(0), -1 ) + # out = (self.fc1(out)) + return out + +# class SConv2dAvg(nn.Module): +# def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1): +# super(SConv2dAvg, self).__init__() +# conv = nn.Conv2d(in_channels, out_channels, kernel_size) +# self.deconv = nn.ConvTranspose2d(1, 1, kernel_size, 1, padding=0, output_padding=0, groups=1, bias=False, dilation=1, padding_mode='zeros') +# nn.init.constant_(self.deconv.weight, 1) +# self.pooldeconv = nn.ConvTranspose2d(1, 1, kernel_size=stride,padding=0,stride=stride, output_padding=0, groups=1, bias=False, dilation=1, padding_mode='zeros') +# nn.init.constant_(self.pooldeconv.weight, 1) +# self.weight = nn.Parameter(conv.weight) +# self.bias = nn.Parameter(conv.bias) +# self.stride = stride +# self.dilation = dilation +# self.padding = padding +# self.kernel_size = kernel_size + +# def forward(self, input, selh=-torch.ones(1,1), selw=-torch.ones(1,1), mask=-torch.ones(1,1),stoch=True): +# stride = self.stride +# if stoch==False: +# stride=1 +# batch_size, in_channels, in_h, in_w = input.shape +# out_channels, in_channels, kh, kw = self.weight.shape +# afterconv_h = in_h-(kh-1) +# afterconv_w = in_w-(kw-1) +# out_h = int((afterconv_h+stride-1)/stride) +# out_w = int((afterconv_w+stride-1)/stride) + +# unfold = torch.nn.Unfold(kernel_size=(kh, kw), dilation=self.dilation, padding=self.padding, stride=1) +# inp_unf = unfold(input) +# if stride!=1: +# inp_unf = inp_unf.view(batch_size,in_channels*kh*kw,afterconv_h,afterconv_w) +# if selh[0,0]==-1: +# resth = (out_h*stride)-afterconv_h +# restw = (out_w*stride)-afterconv_w +# selh = torch.cuda.LongTensor(out_h,out_w).random_(0, stride) +# selw = torch.cuda.LongTensor(out_h,out_w).random_(0, stride) +# #if resth!=0: +# # selh[-1,:]=selh[-1,:]%(stride-resth);selh[:,-1]=selh[:,-1]%(stride-restw) +# # selw[-1,:]=selw[-1,:]%(stride-resth);selw[:,-1]=selw[:,-1]%(stride-restw) +# #if mask[0,0]==-1 +# # mask = torch.ones(out_h,out_w,device=torch.device('cuda')) +# rng_h = selh + torch.arange(0,out_h*stride,stride,device=torch.device('cuda')).view(-1,1) +# rng_w = selw + torch.arange(0,out_w*stride,stride,device=torch.device('cuda')) +# if mask[0,0]==-1: +# inp_unf = inp_unf[:,:,rng_h,rng_w].view(batch_size,in_channels*kh*kw,-1) +# else: +# inp_unf = inp_unf[:,:,rng_h[mask>0],rng_w[mask>0]] + +# if self.bias is None: +# out_unf = inp_unf.transpose(1, 2).matmul(self.weight.view(self.weight.size(0), -1).t()).transpose(1, 2) +# else: +# out_unf = (inp_unf.transpose(1, 2).matmul(self.weight.view(self.weight.size(0), -1).t()) + self.bias).transpose(1, 2) + +# if stride==1 or mask[0,0]==-1: +# out = out_unf.view(batch_size,out_channels,out_h,out_w) +# if stoch==False: +# out = F.avg_pool2d(out,self.stride,ceil_mode=True) +# else: +# out = torch.zeros(batch_size, out_channels,out_h,out_w,device=torch.device('cuda')) +# out[:,:,mask>0] = out_unf +# return out + +# def forward_(self, input, selh=-torch.ones(1,1), selw=-torch.ones(1,1), mask=-torch.ones(1,1),stoch=True): +# stride = self.stride +# if stoch==False: +# stride=1 +# batch_size, in_channels, in_h, in_w = input.shape +# out_channels, in_channels, kh, kw = self.weight.shape +# afterconv_h = in_h-(kh-1) +# afterconv_w = in_w-(kw-1) +# out_h = (afterconv_h+stride-1)/stride +# out_w = (afterconv_w+stride-1)/stride + +# unfold = torch.nn.Unfold(kernel_size=(kh, kw), dilation=self.dilation, padding=self.padding, stride=1) +# inp_unf = unfold(input) + +# if self.bias is None: +# out_unf = inp_unf.transpose(1, 2).matmul(self.weight.view(self.weight.size(0), -1).t()).transpose(1, 2) +# else: +# out_unf = (inp_unf.transpose(1, 2).matmul(self.weight.view(self.weight.size(0), -1).t()) + self.bias).transpose(1, 2) + +# out = out_unf.view(batch_size,out_channels,afterconv_h,afterconv_w) +# if stoch==False: +# out = F.avg_pool2d(out,self.stride,ceil_mode=True) +# return out + +# def sample(self,h,w,mask): +# out_channels, in_channels, kh, kw = self.weight.shape +# afterconv_h = h-(kh-1) +# afterconv_w = w-(kw-1) +# out_h = (afterconv_h+self.stride-1)/self.stride +# out_w = (afterconv_w+self.stride-1)/self.stride +# selh = torch.cuda.LongTensor(out_h,out_w).random_(0, self.stride) +# selw = torch.cuda.LongTensor(out_h,out_w).random_(0, self.stride) +# resth = (out_h*self.stride)-afterconv_h +# restw = (out_w*self.stride)-afterconv_w +# #print(resth) +# #if resth!=0: +# # selh[-1,:]=selh[-1,:]%(self.stride-resth);selh[:,-1]=selh[:,-1]%(self.stride-restw) +# # selw[-1,:]=selw[-1,:]%(self.stride-resth);selw[:,-1]=selw[:,-1]%(self.stride-restw) +# maskh = (out_h)*self.stride#-resth#+self.kernel_size-1 +# maskw = (out_w)*self.stride#-restw#+self.kernel_size-1 +# rng_h = selh + torch.arange(0,out_h*self.stride,self.stride,device=torch.device('cuda')).view(-1,1) +# rng_w = selw + torch.arange(0,out_w*self.stride,self.stride,device=torch.device('cuda')) +# nmask = torch.zeros((maskh,maskw),device=torch.device('cuda')) +# nmask[rng_h,rng_w] = 1 +# #rmask = mask * nmask +# dmask = self.pooldeconv(mask.float().view(1,1,mask.shape[0],mask.shape[1])) +# rmask = nmask * dmask +# #rmask = rmask[:,:,:out_h,:out_w] +# fmask = self.deconv(rmask) +# fmask = fmask[0,0] +# return selh,selw,fmask.long() + +# def get_size(self,h,w): +# newh=(h-(self.kernel_size-1)+(self.stride-1))/self.stride +# neww=(w-(self.kernel_size-1)+(self.stride-1))/self.stride +# return newh,neww + + +# def savg_pool2d(x,size,ceil_mode=False): +# b,c,h,w = x.shape +# selh = torch.LongTensor(h/size,w/size).random_(0, size) +# rngh = torch.arange(0,h,size).long().view(h/size,1).repeat(1,w/size).view(h/size,w/size) +# selx = (selh+rngh).repeat(b,c,1,1) + +# selw = torch.LongTensor(h/size,w/size).random_(0, size) +# rngw = torch.arange(0,w,size).long().view(1,h/size).repeat(h/size,1).view(h/size,w/size) +# sely = (selw+rngw).repeat(b,c,1,1) +# bv, cv ,hv, wv = torch.meshgrid([torch.arange(0,b), torch.arange(0,c),torch.arange(0,h/size),torch.arange(0,w/size)]) +# #x=x.view(b,c,h*w) +# newx = x[bv,cv, selx, sely] +# #ghdh +# return newx + +# def savg_pool2d_(x,size,ceil_mode=False): +# b,c,h,w = x.shape +# selh = torch.cuda.LongTensor(h/size,w/size).random_(0, size) +# rngh = torch.arange(0,h,size,device=torch.device('cuda')).view(-1,1) +# selx = selh+rngh + +# selw = torch.cuda.LongTensor(h/size,w/size).random_(0, size) +# rngw = torch.arange(0,w,size,device=torch.device('cuda')) +# sely = selw+rngw + +# #bv, cv ,hv, wv = torch.meshgrid([torch.arange(0,b), torch.arange(0,c),torch.arange(0,h/size),torch.arange(0,w/size)]) +# #x=x.view(b,c,h*w) +# newx = x[:,:, selx, sely] +# #ghdh +# return newx diff --git a/models/myresnet3.py b/models/myresnet3.py new file mode 100644 index 0000000..1c56b8f --- /dev/null +++ b/models/myresnet3.py @@ -0,0 +1,167 @@ +'''ResNet in PyTorch. + +For Pre-activation ResNet, see 'preact_resnet.py'. + +Reference: +[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun + Deep Residual Learning for Image Recognition. arXiv:1512.03385 +''' +import torch +import torch.nn as nn +import torch.nn.functional as F + +from .sconv2davg import SConv2dAvg + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, in_planes, planes, stride=1, stoch=False): + super(BasicBlock, self).__init__() + self.conv1 = nn.Conv2d( + in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + + self.stoch=stoch + if stoch: + self.conv2 = SConv2dAvg(planes, planes, kernel_size=3, + stride=1, padding=1) #bias=False) #Bias !? + else : + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, + stride=1, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + + self.shortcut = nn.Sequential() + if stride != 1 or in_planes != self.expansion*planes: + self.shortcut = nn.Sequential( + nn.Conv2d(in_planes, self.expansion*planes, + kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(self.expansion*planes) + ) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + if self.stoch: + _,_,h1,w1=out.shape + h2,w2 = self.conv2.get_size(h1,w1) + mask2 = torch.ones((h2,w2), device=x.device) + selh2,selw2,mask1 = self.conv2.sample(h1,w1,mask=mask2) + out = self.bn2(self.conv2(out,selh2,selw2,mask2,stoch=self.stoch)) + else: + out = self.bn2(self.conv2(out)) + out += self.shortcut(x) + out = F.relu(out) + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, in_planes, planes, stride=1): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, + stride=stride, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d(planes, self.expansion * + planes, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(self.expansion*planes) + + self.shortcut = nn.Sequential() + if stride != 1 or in_planes != self.expansion*planes: + self.shortcut = nn.Sequential( + nn.Conv2d(in_planes, self.expansion*planes, + kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(self.expansion*planes) + ) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = F.relu(self.bn2(self.conv2(out))) + out = self.bn3(self.conv3(out)) + out += self.shortcut(x) + out = F.relu(out) + return out + + +class ResNet(nn.Module): + def __init__(self, block, num_blocks, num_classes=10,stoch=False): + super(ResNet, self).__init__() + self.in_planes = 64 + + self.conv1 = nn.Conv2d(3, 64, kernel_size=3, + stride=1, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) + self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) + self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) + self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2, stoch=stoch) + self.linear = nn.Linear(512*block.expansion, num_classes) + self.stoch = stoch + + # if self.stoch: + # old_conv = self.layer4[-1].conv2 + # self.layer4[-1].conv2=SConv2dAvg(old_conv.weight.shape[0], + # old_conv.weight.shape[1], + # old_conv.kernel_size, + # stride=4)#old_conv.stride[0]) #Bias !? + + def _make_layer(self, block, planes, num_blocks, stride, stoch=False): + strides = [stride] + [1]*(num_blocks-1) + layers = [] + for stride in strides: + layers.append(block(self.in_planes, planes, stride)) + self.in_planes = planes * block.expansion + if stoch: + layers[-1]=block(self.in_planes, planes, stride, stoch=True) + return nn.Sequential(*layers) + + def forward(self, x , stoch = False): + #if self.training==False: + # stoch=False + print(stoch) + # self.layer1.stoch=stoch + # self.layer2.stoch=stoch + # self.layer3.stoch=stoch + self.layer4[-1].stoch=stoch + + out = F.relu(self.bn1(self.conv1(x))) + out = self.layer1(out) + out = self.layer2(out) + out = self.layer3(out) + out = self.layer4(out) + + # print(out.shape) + out = F.avg_pool2d(out, 4) + # print(out.shape) + out = out.view(out.size(0), -1) + out = self.linear(out) + return out + + +def MyResNet18(stoch=True): + return ResNet(BasicBlock, [2, 2, 2, 2],stoch=stoch) + + +def ResNet34(): + return ResNet(BasicBlock, [3, 4, 6, 3]) + + +def MyResNet50(): + return ResNet(Bottleneck, [3, 4, 6, 3]) + + +def ResNet101(): + return ResNet(Bottleneck, [3, 4, 23, 3]) + + +def ResNet152(): + return ResNet(Bottleneck, [3, 8, 36, 3]) + + +def test(): + net = ResNet18() + y = net(torch.randn(1, 3, 32, 32)) + print(y.size()) + +# test() diff --git a/models/stoch.py b/models/stoch.py new file mode 100644 index 0000000..34cc92a --- /dev/null +++ b/models/stoch.py @@ -0,0 +1,230 @@ + +import torch +import torch.nn as nn +import torch.nn.functional as F + +import math + +class SConv2dStride(nn.Module): + def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1,ceil_mode=True,bias=False): + super(SConv2dStride, self).__init__() + self.conv = nn.Conv2d(in_channels, out_channels, kernel_size , stride=stride, padding=padding,dilation=dilation,bias=bias) + self.stride = stride + self.ceil_mode = ceil_mode + + def forward(self, x,stoch = True): + stoch=True #for some reason average does not work... + if stoch: + device= x.device + selh = torch.randint(self.conv.stride[0],(1,), device=device)[0] + selw = torch.randint(self.conv.stride[1],(1,), device=device)[0] + out = self.conv(x[:,:,selh:,selw:]) + else: + self.conv.stride = (1,1) + out = self.conv(x) + out = F.avg_pool2d(out,self.stride,ceil_mode=self.ceil_mode) + self.conv.stride = (self.stride,self.stride) + return out + +class SConv2dAvg(nn.Module): + def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1,ceil_mode=True, bias = True): + super(SConv2dAvg, self).__init__() + conv = nn.Conv2d(in_channels, out_channels, kernel_size) + self.deconv = nn.ConvTranspose2d(1, 1, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=False, dilation=1, padding_mode='zeros') + nn.init.constant_(self.deconv.weight, 1) + self.pooldeconv = nn.ConvTranspose2d(1, 1, kernel_size=stride,padding=0,stride=stride, output_padding=0, groups=1, bias=False, dilation=1, padding_mode='zeros') + nn.init.constant_(self.pooldeconv.weight, 1) + self.weight = nn.Parameter(conv.weight) + if bias: + self.bias = nn.Parameter(conv.bias) + else: + self.bias = None + self.stride = stride + self.dilation = dilation + self.padding = padding + self.kernel_size = kernel_size + self.ceil_mode = ceil_mode + + def forward(self, input, selh=-torch.ones(1,1), selw=-torch.ones(1,1), mask=-torch.ones(1,1),stoch=True,stride=-1): + device=input.device + if stride==-1: + stride = self.stride #if stride not defined use self.stride + if stoch==False: + stride=1 #test with real average pooling + batch_size, in_channels, in_h, in_w = input.shape + out_channels, in_channels, kh, kw = self.weight.shape + + afterconv_h = in_h+2*self.padding-(kh-1) #size after conv + afterconv_w = in_w+2*self.padding-(kw-1) + if self.ceil_mode: #ceil_mode = talse default mode for strided conv + out_h = math.ceil(afterconv_h/stride) + out_w = math.ceil(afterconv_w/stride) + else: #ceil_mode = false default mode for pooling + out_h = math.floor(afterconv_h/stride) + out_w = math.floor(afterconv_w/stride) + unfold = torch.nn.Unfold(kernel_size=(kh, kw), dilation=self.dilation, padding=self.padding, stride=1) + inp_unf = unfold(input) #transform into a matrix (batch_size, in_channels*kh*kw,afterconv_h,afterconv_w) + if stride!=1: # if stride==1 there is no pooling + inp_unf = inp_unf.view(batch_size,in_channels*kh*kw,afterconv_h,afterconv_w) + if selh[0,0]==-1: # if not given sampled selection + #selction of where to sample for each pooling location + selh = torch.randint(stride,(out_h,out_w), device=device) + selw = torch.randint(stride,(out_h,out_w), device=device) + + resth = (out_h*stride)-afterconv_h + restw = (out_w*stride)-afterconv_w + if resth!=0 and self.ceil_mode: #in case of ceil_mode need to select only the good locations for the last regions + selh[-1,:]=selh[-1,:]%(stride-resth);selh[:,-1]=selh[:,-1]%(stride-restw) + selw[-1,:]=selw[-1,:]%(stride-resth);selw[:,-1]=selw[:,-1]%(stride-restw) + #the postion should be global by adding range... + rng_h = selh + torch.arange(0,out_h*stride,stride,device=device).view(-1,1) + rng_w = selw + torch.arange(0,out_w*stride,stride,device=device) + + if mask[0,0]==-1:# in case of not given mask use only sampled selection + inp_unf = inp_unf[:,:,rng_h,rng_w].view(batch_size,in_channels*kh*kw,-1) + else:#in case of a valid mask use selection only on the mask locations + inp_unf = inp_unf[:,:,rng_h[mask>0],rng_w[mask>0]] + + #Matrix mul + if self.bias is None: + out_unf = inp_unf.transpose(1, 2).matmul(self.weight.view(self.weight.size(0), -1).t()).transpose(1, 2) + else: + out_unf = (inp_unf.transpose(1, 2).matmul(self.weight.view(self.weight.size(0), -1).t()) + self.bias).transpose(1, 2) + + if stride==1 or mask[0,0]==-1:# in case of no mask and stride==1 + out = out_unf.view(batch_size,out_channels,out_h,out_w) #Fold + #if stoch==False: #this is done outside for more clarity + # out = F.avg_pool2d(out,self.stride,ceil_mode=True) + else:#in case of mask + out = torch.zeros(batch_size, out_channels,out_h,out_w,device=device) + out[:,:,mask>0] = out_unf + return out + + def forward_(self, input, selh=-torch.ones(1,1), selw=-torch.ones(1,1), mask=-torch.ones(1,1),stoch=True,stride=-1): + device=input.device + if stride==-1: + stride = self.stride + #stoch=True + if stoch==False: + stride=1 #test with real average pooling + batch_size, in_channels, in_h, in_w = input.shape + out_channels, in_channels, kh, kw = self.weight.shape + + afterconv_h = in_h+2*padding-(kh-1) #size after conv + afterconv_w = in_w+2*padding-(kw-1) + if self.ceil_mode: + out_h = math.ceil(afterconv_h/stride) + out_w = math.ceil(afterconv_w/stride) + else: + out_h = math.floor(afterconv_h/stride) + out_w = math.floor(afterconv_w/stride) + unfold = torch.nn.Unfold(kernel_size=(kh, kw), dilation=self.dilation, padding=self.padding, stride=1) + inp_unf = unfold(input) + if stride!=1: + inp_unf = inp_unf.view(batch_size,in_channels,kh*kw,afterconv_h,afterconv_w) + if selh[0,0]==-1: + resth = (out_h*stride)-afterconv_h + restw = (out_w*stride)-afterconv_w + selh = torch.randint(stride,(in_channels,out_h,out_w), device=device) + selw = torch.randint(stride,(in_channels,out_h,out_w), device=device) + # print(selh.shape) + if resth!=0: + # Cas : (stride-resth)=0 ? + selh[-1,:]=selh[-1,:]%(stride-resth);selh[:,-1]=selh[:,-1]%(stride-restw) + selw[-1,:]=selw[-1,:]%(stride-resth);selw[:,-1]=selw[:,-1]%(stride-restw) + rng_h = selh + torch.arange(0,out_h*stride,stride,device=device).view(1,-1,1) + rng_w = selw + torch.arange(0,out_w*stride,stride,device=device).view(1,1,-1) + selc = torch.arange(0,in_channels,device=input.device).view(in_channels,1,1).repeat(1,out_h,out_w) + + if mask[0,0]==-1: + inp_unf = inp_unf.transpose(1,2)[:,:,selc,rng_h,rng_w].transpose(2,1).reshape(batch_size,in_channels*kh*kw,-1) + else: + inp_unf = inp_unf[:,:,rng_h[mask>0],rng_w[mask>0]] + + #Matrix mul + if self.bias is None: + out_unf = inp_unf.transpose(1, 2).matmul(self.weight.view(self.weight.size(0), -1).t()).transpose(1, 2) + else: + out_unf = (inp_unf.transpose(1, 2).matmul(self.weight.view(self.weight.size(0), -1).t()) + self.bias).transpose(1, 2) + + if stride==1 or mask[0,0]==-1: + out = out_unf.view(batch_size,out_channels,out_h,out_w) #Fold + # if stoch==False: + # out = F.avg_pool2d(out,self.stride,ceil_mode=True) + else: + out = torch.zeros(batch_size, out_channels,out_h,out_w,device=device) + out[:,:,mask>0] = out_unf + return out + + + def comp(self,h,w,mask=-torch.ones(1,1)): + out_h = (h-(self.kernel_size))/self.stride + out_w = (w-(self.kernel_size))/self.stride + if self.ceil_mode: + out_h = math.ceil(out_h) + out_w = math.ceil(out_w) + else: + out_h = math.floor(out_h) + out_w = math.florr(out_w) + if mask[0,0]==-1: + comp = self.weight.numel()*out_h*out_w + else: + comp = self.weight.numel()*(mask>0).sum() + return comp + + def sample(self,h,w,mask): + ''' + h, w : forward input shape + mask : mask of output used in computation + ''' + stride = self.stride + out_channels, in_channels, kh, kw = self.weight.shape + device=mask.device + + afterconv_h = h-(kh-1) # Dim after deconv (ou after conv in forward) + afterconv_w = w-(kw-1) + print(afterconv_h/stride) + if self.ceil_mode: + out_h = math.ceil(afterconv_h/stride) + out_w = math.ceil(afterconv_w/stride) + else: + out_h = math.floor(afterconv_h/stride) + out_w = math.floor(afterconv_w/stride) + # out_h=((afterconv_h+2*self.padding-1)/stride)+1 + # out_w=((afterconv_w+2*self.padding-1)/stride)+1 + print('Out',out_h, out_w) + assert(tuple(mask.shape)==(out_h,out_w)) + # out_h,out_w=mask.shape + + selh = torch.randint(stride,(out_h,out_w), device=device) + selw = torch.randint(stride,(out_h,out_w), device=device) + + resth = (out_h*stride)-afterconv_h #reste de ceil/floor, 0 ou 1 + restw = (out_w*stride)-afterconv_w + print('rest', resth, restw) + if resth!=0: + selh[-1,:]=selh[-1,:]%(stride-resth);selh[:,-1]=selh[:,-1]%(stride-restw) + selw[-1,:]=selw[-1,:]%(stride-resth);selw[:,-1]=selw[:,-1]%(stride-restw) + maskh = (out_h)*stride + maskw = (out_w)*stride + print('mask', maskh, maskw) + rng_h = selh + torch.arange(0,out_h*stride,stride,device=device).view(-1,1) + rng_w = selw + torch.arange(0,out_w*stride,stride,device=device) + # rng_w = selw + torch.arange(0,out_w*self.stride,self.stride,device=device).view(-1,1) + nmask = torch.zeros((maskh,maskw),device=device) + nmask[rng_h,rng_w] = 1 + #rmask = mask * nmask + dmask = self.pooldeconv(mask.float().view(1,1,mask.shape[0],mask.shape[1])) + rmask = nmask * dmask + #rmask = rmask[:,:,:out_h,:out_w] + # print('rmask', rmask.shape) + fmask = self.deconv(rmask) + # print('fmask', fmask.shape) + fmask = fmask[0,0] + return selh,selw,fmask.long() + + def get_size(self,h,w): + newh=math.floor(((h + 2*self.padding - self.dilation*(self.kernel_size-1) - 1)/self.stride) + 1) + neww=math.floor(((w + 2*self.padding - self.dilation*(self.kernel_size-1) - 1)/self.stride) + 1) + return newh, neww + diff --git a/models/stochsim.py b/models/stochsim.py new file mode 100644 index 0000000..97de299 --- /dev/null +++ b/models/stochsim.py @@ -0,0 +1,147 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +import math + +# spatial batch and channel +def savg_pool2d_sbc(x,size,ceil_mode=False): + b,c,h,w = x.shape + device = x.device + if ceil_mode: + out_h = math.ceil(h/size) + out_w = math.ceil(w/size) + else: + out_h = math.floor(h/size) + out_w = math.floor(w/size) + selh = torch.randint(size,(b,c,out_h,out_w), device=device) + #selh[:] = 0 + rngh = torch.arange(0,h,size,device=x.device).view(1,1,-1,1) + selh = selh+rngh + + selw = torch.randint(size,(b,c,out_h,out_w), device=device) + #selw[:] = 0 + rngw = torch.arange(0,w,size,device=x.device).view(1,1,1,-1) + selw = selw+rngw + selc = torch.arange(0,c,device=x.device).view(1,c,1,1).repeat(b,1,out_h,out_w) + selb = torch.arange(0,b,device=x.device).view(b,1,1,1).repeat(1,c,out_h,out_w) + newx = x[selb,selc,selh, selw] + return newx + +#spatial and channel, same for all batch +def savg_pool2d_sc(x,size,ceil_mode=False): + b,c,h,w = x.shape + device = x.device + if ceil_mode: + out_h = math.ceil(h/size) + out_w = math.ceil(w/size) + else: + out_h = math.floor(h/size) + out_w = math.floor(w/size) + selh = torch.randint(size,(c,out_h,out_w), device=device) + #selh[:] = 0 + rngh = torch.arange(0,h,size,device=x.device).view(1,-1,1) + selh = selh+rngh + + selw = torch.randint(size,(c,out_h,out_w), device=device) + #selw[:] = 0 + rngw = torch.arange(0,w,size,device=x.device).view(1,1,-1) + selw = selw+rngw + selc = torch.arange(0,c,device=x.device).view(c,1,1).repeat(1,out_h,out_w) + + newx = x[:,selc,selh, selw] + return newx + +#spatial and batch, same for all channels +def savg_pool2d_sb(x,size,ceil_mode=False): + b,c,h,w = x.shape + device = x.device + if ceil_mode: + out_h = math.ceil(h/size) + out_w = math.ceil(w/size) + else: + out_h = math.floor(h/size) + out_w = math.floor(w/size) + selh = torch.randint(size,(b,out_h,out_w), device=device) + #selh[:] = 0 + rngh = torch.arange(0,h,size,device=x.device).view(1,-1,1) + selh = selh+rngh + + selw = torch.randint(size,(b,out_h,out_w), device=device) + #selw[:] = 0 + rngw = torch.arange(0,w,size,device=x.device).view(1,1,-1) + selw = selw+rngw + selb = torch.arange(0,b,device=x.device).view(b,1,1).repeat(1,out_h,out_w) + + newx = x.transpose(1,0) + newx = newx[:,selb,selh, selw] + return newx.transpose(1,0) + +#spatial stochasticity, same for all batch and channels +def savg_pool2d_s(x,size,ceil_mode=False): + b,c,h,w = x.shape + device = x.device + if ceil_mode: + out_h = math.ceil(h/size) + out_w = math.ceil(w/size) + else: + out_h = math.floor(h/size) + out_w = math.floor(w/size) + selh = torch.randint(size,(out_h,out_w), device=device) + #selh[:] = 0 + rngh = torch.arange(0,h,size,device=x.device).view(-1,1) + selh = selh+rngh + + selw = torch.randint(size,(out_h,out_w), device=device) + #selw[:] = 0 + rngw = torch.arange(0,w,size,device=x.device) + selw = selw+rngw + + newx = x[:,:, selh, selw] + return newx + +def savg_pool2d_sdrop(x,size,ceil_mode=False,drop=0,repeat=1): + b,c,h,w = x.shape + device = x.device + if ceil_mode: + out_h = math.ceil(h/size) + out_w = math.ceil(w/size) + else: + out_h = math.floor(h/size) + out_w = math.floor(w/size) + + for l in range(repeat): + selh = torch.randint(size,(out_h,out_w), device=device) + rngh = torch.arange(0,h,size,device=x.device).view(-1,1) + selh = selh+rngh + + selw = torch.randint(size,(out_h,out_w), device=device) + rngw = torch.arange(0,w,size,device=x.device) + selw = selw+rngw + + if l==0: + newx = x[:,:, selh, selw] + else: + newx = newx + x[:,:, selh, selw] + newx = newx/repeat + if drop!=0: + dropmask = torch.rand((c), device=device) + newx[:,dropmask Computing mean and std..') + for inputs, targets in dataloader: + for i in range(3): + mean[i] += inputs[:,i,:,:].mean() + std[i] += inputs[:,i,:,:].std() + mean.div_(len(dataset)) + std.div_(len(dataset)) + return mean, std + +def init_params(net): + '''Init layer parameters.''' + for m in net.modules(): + if isinstance(m, nn.Conv2d): + init.kaiming_normal(m.weight, mode='fan_out') + if m.bias: + init.constant(m.bias, 0) + elif isinstance(m, nn.BatchNorm2d): + init.constant(m.weight, 1) + init.constant(m.bias, 0) + elif isinstance(m, nn.Linear): + init.normal(m.weight, std=1e-3) + if m.bias: + init.constant(m.bias, 0) + + +_, term_width = os.popen('stty size', 'r').read().split() +term_width = int(term_width) + +TOTAL_BAR_LENGTH = 65. +last_time = time.time() +begin_time = last_time +def progress_bar(current, total, msg=None): + global last_time, begin_time + if current == 0: + begin_time = time.time() # Reset for new bar. + + cur_len = int(TOTAL_BAR_LENGTH*current/total) + rest_len = int(TOTAL_BAR_LENGTH - cur_len) - 1 + + sys.stdout.write(' [') + for i in range(cur_len): + sys.stdout.write('=') + sys.stdout.write('>') + for i in range(rest_len): + sys.stdout.write('.') + sys.stdout.write(']') + + cur_time = time.time() + step_time = cur_time - last_time + last_time = cur_time + tot_time = cur_time - begin_time + + L = [] + L.append(' Step: %s' % format_time(step_time)) + L.append(' | Tot: %s' % format_time(tot_time)) + if msg: + L.append(' | ' + msg) + + msg = ''.join(L) + sys.stdout.write(msg) + for i in range(term_width-int(TOTAL_BAR_LENGTH)-len(msg)-3): + sys.stdout.write(' ') + + # Go back to the center of the bar. + for i in range(term_width-int(TOTAL_BAR_LENGTH/2)+2): + sys.stdout.write('\b') + sys.stdout.write(' %d/%d ' % (current+1, total)) + + if current < total-1: + sys.stdout.write('\r') + else: + sys.stdout.write('\n') + sys.stdout.flush() + +def format_time(seconds): + days = int(seconds / 3600/24) + seconds = seconds - days*3600*24 + hours = int(seconds / 3600) + seconds = seconds - hours*3600 + minutes = int(seconds / 60) + seconds = seconds - minutes*60 + secondsf = int(seconds) + seconds = seconds - secondsf + millis = int(seconds*1000) + + f = '' + i = 1 + if days > 0: + f += str(days) + 'D' + i += 1 + if hours > 0 and i <= 2: + f += str(hours) + 'h' + i += 1 + if minutes > 0 and i <= 2: + f += str(minutes) + 'm' + i += 1 + if secondsf > 0 and i <= 2: + f += str(secondsf) + 's' + i += 1 + if millis > 0 and i <= 2: + f += str(millis) + 'ms' + i += 1 + if f == '': + f = '0ms' + return f