smart_augmentation/higher/smart_aug/datasets.py

81 lines
3.8 KiB
Python
Raw Normal View History

2020-01-24 14:32:37 -05:00
""" Dataset definition.
MNIST / CIFAR10
"""
2019-11-13 11:45:05 -05:00
import torch
from torch.utils.data import SubsetRandomSampler
2020-02-03 12:55:36 -05:00
from torch.utils.data.dataset import ConcatDataset
2019-11-13 11:45:05 -05:00
import torchvision
2020-01-24 15:10:08 -05:00
#Train/Validation batch size.
2019-11-13 11:45:05 -05:00
BATCH_SIZE = 300
2020-01-24 15:10:08 -05:00
#Test batch size.
TEST_SIZE = BATCH_SIZE
#TEST_SIZE = 10000 #legerement +Rapide / + Consomation memoire !
2019-11-13 11:45:05 -05:00
2020-01-24 15:10:08 -05:00
#Wether to download data.
2020-02-03 11:21:54 -05:00
download_data=False
2020-01-24 15:10:08 -05:00
#Number of worker to use.
num_workers=2 #4
2020-01-24 15:10:08 -05:00
#Pin GPU memory
2020-01-20 11:05:40 -05:00
pin_memory=False #True :+ GPU memory / + Lent
2019-11-13 11:45:05 -05:00
#ATTENTION : Dataug (Kornia) Expect image in the range of [0, 1]
#transform_train = torchvision.transforms.Compose([
# torchvision.transforms.RandomHorizontalFlip(),
# torchvision.transforms.ToTensor(),
# torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), #CIFAR10
2019-11-13 11:45:05 -05:00
#])
transform = torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
2020-01-31 10:34:44 -05:00
# torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), #CIFAR10
2019-11-13 11:45:05 -05:00
])
2020-01-31 10:34:44 -05:00
transform_train = torchvision.transforms.Compose([
#transforms.RandomHorizontalFlip(),
#transforms.RandomVerticalFlip(),
torchvision.transforms.ToTensor(),
])
2020-01-31 10:36:50 -05:00
#from RandAugment import RandAugment
# Add RandAugment with N, M(hyperparameter)
2020-01-31 10:34:44 -05:00
#transform_train.transforms.insert(0, RandAugment(n=2, m=30))
2019-12-04 12:28:32 -05:00
### Classic Dataset ###
2020-02-03 12:55:36 -05:00
dataroot="../data"
2020-01-31 10:34:44 -05:00
#MNIST
2020-02-03 12:55:36 -05:00
#data_train = torchvision.datasets.MNIST(dataroot, train=True, download=True, transform=transform_train)
#data_val = torchvision.datasets.MNIST(dataroot, train=True, download=True, transform=transform)
#data_test = torchvision.datasets.MNIST(dataroot, train=False, download=True, transform=transform)
2020-01-31 10:34:44 -05:00
#CIFAR
2020-02-03 12:55:36 -05:00
data_train = torchvision.datasets.CIFAR10(dataroot, train=True, download=download_data, transform=transform_train)
#data_val = torchvision.datasets.CIFAR10(dataroot, train=True, download=download_data, transform=transform)
data_test = torchvision.datasets.CIFAR10(dataroot, train=False, download=download_data, transform=transform)
#data_train = torchvision.datasets.CIFAR100(dataroot, train=True, download=download_data, transform=transform_train)
#data_val = torchvision.datasets.CIFAR100(dataroot, train=True, download=download_data, transform=transform)
#data_test = torchvision.datasets.CIFAR100(dataroot, train=False, download=download_data, transform=transform)
#SVHN
#trainset = torchvision.datasets.SVHN(root=dataroot, split='train', download=download_data, transform=transform_train)
#extraset = torchvision.datasets.SVHN(root=dataroot, split='extra', download=download_data, transform=transform_train)
#data_train = ConcatDataset([trainset, extraset])
#data_test = torchvision.datasets.SVHN(dataroot, split='test', download=download_data, transform=transform)
#ImageNet
#Necessite SciPy
# Probleme ? : https://github.com/ildoonet/pytorch-randaugment/blob/48b8f509c4bbda93bbe733d98b3fd052b6e4c8ae/RandAugment/imagenet.py#L28
#data_train = torchvision.datasets.ImageNet(root=os.path.join(dataroot, 'imagenet-pytorch'), split='train', transform=transform_train)
#data_test = torchvision.datasets.ImageNet(root=os.path.join(dataroot, 'imagenet-pytorch'), split='val', transform=transform_test)
2019-12-04 12:28:32 -05:00
2019-11-20 16:06:27 -05:00
train_subset_indices=range(int(len(data_train)/2))
2019-11-13 11:45:05 -05:00
val_subset_indices=range(int(len(data_train)/2),len(data_train))
2019-11-21 12:29:17 -05:00
#train_subset_indices=range(BATCH_SIZE*10)
#val_subset_indices=range(BATCH_SIZE*10, BATCH_SIZE*20)
2019-12-04 12:28:32 -05:00
2020-01-24 11:50:30 -05:00
dl_train = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(train_subset_indices), num_workers=num_workers, pin_memory=pin_memory)
2020-02-03 11:21:54 -05:00
dl_val = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(val_subset_indices), num_workers=num_workers, pin_memory=pin_memory)
dl_test = torch.utils.data.DataLoader(data_test, batch_size=TEST_SIZE, shuffle=False, num_workers=num_workers, pin_memory=pin_memory)