mirror of
https://github.com/AntoineHX/smart_augmentation.git
synced 2025-05-04 12:10:45 +02:00
Comment Confmat + Cross-Val (sans Skorch) + minor improv
This commit is contained in:
parent
385bc9977c
commit
be8491268a
4 changed files with 133 additions and 33 deletions
|
@ -3,7 +3,6 @@
|
|||
MNIST / CIFAR10
|
||||
"""
|
||||
import torch
|
||||
from torch.utils.data import SubsetRandomSampler
|
||||
from torch.utils.data.dataset import ConcatDataset
|
||||
import torchvision
|
||||
|
||||
|
@ -72,26 +71,90 @@ data_test = torchvision.datasets.CIFAR10(dataroot, train=False, download=downloa
|
|||
|
||||
|
||||
#Validation set size [0, 1]
|
||||
#valid_size=0.1
|
||||
valid_size=0.1
|
||||
#train_subset_indices=range(int(len(data_train)*(1-valid_size)))
|
||||
#val_subset_indices=range(int(len(data_train)*(1-valid_size)),len(data_train))
|
||||
#train_subset_indices=range(BATCH_SIZE*10)
|
||||
#val_subset_indices=range(BATCH_SIZE*10, BATCH_SIZE*20)
|
||||
|
||||
#from torch.utils.data import SubsetRandomSampler
|
||||
#dl_train = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(train_subset_indices), num_workers=num_workers, pin_memory=pin_memory)
|
||||
#dl_val = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(val_subset_indices), num_workers=num_workers, pin_memory=pin_memory)
|
||||
dl_test = torch.utils.data.DataLoader(data_test, batch_size=TEST_SIZE, shuffle=False, num_workers=num_workers, pin_memory=pin_memory)
|
||||
|
||||
#Cross Validation
|
||||
'''
|
||||
from skorch.dataset import CVSplit
|
||||
cvs = CVSplit(cv=5)
|
||||
import numpy as np
|
||||
cvs = CVSplit(cv=valid_size, stratified=True) #Stratified =True for unbalanced dataset #ShuffleSplit
|
||||
|
||||
def next_CVSplit():
|
||||
|
||||
train_subset, val_subset = cvs(data_train)
|
||||
train_subset, val_subset = cvs(data_train, y=np.array(data_train.targets))
|
||||
dl_train = torch.utils.data.DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=pin_memory)
|
||||
dl_val = torch.utils.data.DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=pin_memory)
|
||||
|
||||
return dl_train, dl_val
|
||||
|
||||
dl_train, dl_val = next_CVSplit()
|
||||
dl_train, dl_val = next_CVSplit()
|
||||
'''
|
||||
import numpy as np
|
||||
from sklearn.model_selection import ShuffleSplit
|
||||
from sklearn.model_selection import StratifiedShuffleSplit
|
||||
class CVSplit(object):
|
||||
"""Class that perform train/valid split on a dataset.
|
||||
|
||||
Inspired from : https://skorch.readthedocs.io/en/latest/user/dataset.html
|
||||
|
||||
Attributes:
|
||||
_stratified (bool): Wether the split should be stratified. Recommended to be True for unbalanced dataset.
|
||||
_val_size (float, int): If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to include in the validation split.
|
||||
If int, represents the absolute number of validation samples.
|
||||
_data (Dataset): Dataset to split.
|
||||
_targets (np.array): Targets of the dataset used if _stratified is set to True.
|
||||
_cv (BaseShuffleSplit) : Scikit learn object used to split.
|
||||
|
||||
"""
|
||||
def __init__(self, data, val_size=0.1, stratified=True):
|
||||
""" Intialize CVSplit.
|
||||
|
||||
Args:
|
||||
data (Dataset): Dataset to split.
|
||||
val_size (float, int): If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to include in the validation split.
|
||||
If int, represents the absolute number of validation samples. (Default: 0.1)
|
||||
stratified (bool): Wether the split should be stratified. Recommended to be True for unbalanced dataset.
|
||||
"""
|
||||
self._stratified=stratified
|
||||
self._val_size=val_size
|
||||
|
||||
self._data=data
|
||||
if self._stratified:
|
||||
cv_cls = StratifiedShuffleSplit
|
||||
self._targets= np.array(data_train.targets)
|
||||
else:
|
||||
cv_cls = ShuffleSplit
|
||||
|
||||
self._cv= cv_cls(test_size=val_size, random_state=0)
|
||||
|
||||
def next_split(self):
|
||||
""" Get next cross-validation split.
|
||||
|
||||
Returns:
|
||||
Train DataLoader, Validation DataLoader
|
||||
"""
|
||||
args=(np.arange(len(self._data)),)
|
||||
if self._stratified:
|
||||
args = args + (self._targets,)
|
||||
|
||||
idx_train, idx_valid = next(iter(self._cv.split(*args)))
|
||||
|
||||
train_subset = torch.utils.data.Subset(self._data, idx_train)
|
||||
val_subset = torch.utils.data.Subset(self._data, idx_valid)
|
||||
|
||||
dl_train = torch.utils.data.DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=pin_memory)
|
||||
dl_val = torch.utils.data.DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=pin_memory)
|
||||
|
||||
return dl_train, dl_val
|
||||
|
||||
cvs = CVSplit(data_train, val_size=valid_size)
|
||||
dl_train, dl_val = cvs.next_split()
|
Loading…
Add table
Add a link
Reference in a new issue