From 6277e268c1274594aff3171e205702b5d0a9e62e Mon Sep 17 00:00:00 2001
From: "Harle, Antoine (Contracteur)" <Antoine.Harle@Teledyne.com>
Date: Wed, 5 Feb 2020 12:24:20 -0500
Subject: [PATCH] RandAugment

---
 augmentations_randaugment.py  | 271 ++++++++++++++++++++++++++++++++++
 higher/smart_aug/benchmark.py | 149 +++++++++++++------
 higher/smart_aug/datasets.py  |  60 ++++----
 3 files changed, 407 insertions(+), 73 deletions(-)
 create mode 100644 augmentations_randaugment.py

diff --git a/augmentations_randaugment.py b/augmentations_randaugment.py
new file mode 100644
index 0000000..b491942
--- /dev/null
+++ b/augmentations_randaugment.py
@@ -0,0 +1,271 @@
+# code in this file is adpated from rpmcruz/autoaugment
+# https://github.com/rpmcruz/autoaugment/blob/master/transformations.py
+import random
+
+import PIL, PIL.ImageOps, PIL.ImageEnhance, PIL.ImageDraw
+import numpy as np
+import torch
+from PIL import Image
+
+def ShearX(img, v):  # [-0.3, 0.3]
+    assert -0.3 <= v <= 0.3
+    if random.random() > 0.5:
+        v = -v
+    return img.transform(img.size, PIL.Image.AFFINE, (1, v, 0, 0, 1, 0))
+
+
+def ShearY(img, v):  # [-0.3, 0.3]
+    assert -0.3 <= v <= 0.3
+    if random.random() > 0.5:
+        v = -v
+    return img.transform(img.size, PIL.Image.AFFINE, (1, 0, 0, v, 1, 0))
+
+
+def TranslateX(img, v):  # [-150, 150] => percentage: [-0.45, 0.45]
+    assert -0.45 <= v <= 0.45
+    if random.random() > 0.5:
+        v = -v
+    v = v * img.size[0]
+    return img.transform(img.size, PIL.Image.AFFINE, (1, 0, v, 0, 1, 0))
+
+
+def TranslateXabs(img, v):  # [-150, 150] => percentage: [-0.45, 0.45]
+    assert 0 <= v
+    if random.random() > 0.5:
+        v = -v
+    return img.transform(img.size, PIL.Image.AFFINE, (1, 0, v, 0, 1, 0))
+
+
+def TranslateY(img, v):  # [-150, 150] => percentage: [-0.45, 0.45]
+    assert -0.45 <= v <= 0.45
+    if random.random() > 0.5:
+        v = -v
+    v = v * img.size[1]
+    return img.transform(img.size, PIL.Image.AFFINE, (1, 0, 0, 0, 1, v))
+
+
+def TranslateYabs(img, v):  # [-150, 150] => percentage: [-0.45, 0.45]
+    assert 0 <= v
+    if random.random() > 0.5:
+        v = -v
+    return img.transform(img.size, PIL.Image.AFFINE, (1, 0, 0, 0, 1, v))
+
+
+def Rotate(img, v):  # [-30, 30]
+    assert -30 <= v <= 30
+    if random.random() > 0.5:
+        v = -v
+    return img.rotate(v)
+
+
+def AutoContrast(img, _):
+    return PIL.ImageOps.autocontrast(img)
+
+
+def Invert(img, _):
+    return PIL.ImageOps.invert(img)
+
+
+def Equalize(img, _):
+    return PIL.ImageOps.equalize(img)
+
+
+def Flip(img, _):  # not from the paper
+    return PIL.ImageOps.mirror(img)
+
+def FlipLR(img, v):
+    return img.transpose(Image.FLIP_LEFT_RIGHT)
+
+def FlipUD(img, v):
+    return img.transpose(Image.FLIP_TOP_BOTTOM)
+
+def Solarize(img, v):  # [0, 256]
+    assert 0 <= v <= 256
+    return PIL.ImageOps.solarize(img, v)
+
+
+def SolarizeAdd(img, addition=0, threshold=128):
+    img_np = np.array(img).astype(np.int)
+    img_np = img_np + addition
+    img_np = np.clip(img_np, 0, 255)
+    img_np = img_np.astype(np.uint8)
+    img = Image.fromarray(img_np)
+    return PIL.ImageOps.solarize(img, threshold)
+
+
+def Posterize(img, v):  # [4, 8]
+    v = int(v)
+    v = max(1, v)
+    return PIL.ImageOps.posterize(img, v)
+
+
+def Contrast(img, v):  # [0.1,1.9]
+    assert 0.1 <= v <= 1.9
+    return PIL.ImageEnhance.Contrast(img).enhance(v)
+
+
+def Color(img, v):  # [0.1,1.9]
+    assert 0.1 <= v <= 1.9
+    return PIL.ImageEnhance.Color(img).enhance(v)
+
+
+def Brightness(img, v):  # [0.1,1.9]
+    assert 0.1 <= v <= 1.9
+    return PIL.ImageEnhance.Brightness(img).enhance(v)
+
+
+def Sharpness(img, v):  # [0.1,1.9]
+    assert 0.1 <= v <= 1.9
+    return PIL.ImageEnhance.Sharpness(img).enhance(v)
+
+
+def Cutout(img, v):  # [0, 60] => percentage: [0, 0.2]
+    assert 0.0 <= v <= 0.2
+    if v <= 0.:
+        return img
+
+    v = v * img.size[0]
+    return CutoutAbs(img, v)
+
+
+def CutoutAbs(img, v):  # [0, 60] => percentage: [0, 0.2]
+    # assert 0 <= v <= 20
+    if v < 0:
+        return img
+    w, h = img.size
+    x0 = np.random.uniform(w)
+    y0 = np.random.uniform(h)
+
+    x0 = int(max(0, x0 - v / 2.))
+    y0 = int(max(0, y0 - v / 2.))
+    x1 = min(w, x0 + v)
+    y1 = min(h, y0 + v)
+
+    xy = (x0, y0, x1, y1)
+    color = (125, 123, 114)
+    # color = (0, 0, 0)
+    img = img.copy()
+    PIL.ImageDraw.Draw(img).rectangle(xy, color)
+    return img
+
+
+def SamplePairing(imgs):  # [0, 0.4]
+    def f(img1, v):
+        i = np.random.choice(len(imgs))
+        img2 = PIL.Image.fromarray(imgs[i])
+        return PIL.Image.blend(img1, img2, v)
+
+    return f
+
+
+def Identity(img, v):
+    return img
+
+
+def augment_list():  # 16 oeprations and their ranges
+    # https://github.com/google-research/uda/blob/master/image/randaugment/policies.py#L57
+    l = [
+        (Identity, 0., 1.0),
+        (FlipUD, 0., 1.0),
+        (FlipLR, 0., 1.0),
+        (Rotate, 0, 30),  # 4
+        (TranslateX, 0., 0.33),  # 2
+        (TranslateY, 0., 0.33),  # 3
+        (ShearX, 0., 0.3),  # 0
+        (ShearY, 0., 0.3),  # 1
+        #(AutoContrast, 0, 1),  # 5
+        #(Invert, 0, 1),  # 6
+        #(Equalize, 0, 1),  # 7
+        (Contrast, 0.1, 1.9),  # 10
+        (Color, 0.1, 1.9),  # 11
+        (Brightness, 0.1, 1.9),  # 12
+        (Sharpness, 0.1, 1.9),  # 13
+        (Posterize, 4, 8),  # 9
+        (Solarize, 1, 256),  # 8
+        
+        # (Cutout, 0, 0.2),  # 14
+        # (SamplePairing(imgs), 0, 0.4),  # 15
+    ]
+
+    # https://github.com/tensorflow/tpu/blob/8462d083dd89489a79e3200bcc8d4063bf362186/models/official/efficientnet/autoaugment.py#L505
+    #l = [
+    #    (AutoContrast, 0, 1),
+    #    (Equalize, 0, 1),
+    #    (Invert, 0, 1),
+    #    (Rotate, 0, 30),
+    #    (Posterize, 0, 4),
+    #    (Solarize, 0, 256),
+    #    (SolarizeAdd, 0, 110),
+    #    (Color, 0.1, 1.9),
+    #    (Contrast, 0.1, 1.9),
+    #    (Brightness, 0.1, 1.9),
+    #    (Sharpness, 0.1, 1.9),
+    #    (ShearX, 0., 0.3),
+    #    (ShearY, 0., 0.3),
+    #    (CutoutAbs, 0, 40),
+    #    (TranslateXabs, 0., 100),
+    #    (TranslateYabs, 0., 100),
+    #]
+
+    return l
+
+
+class Lighting(object):
+    """Lighting noise(AlexNet - style PCA - based noise)"""
+
+    def __init__(self, alphastd, eigval, eigvec):
+        self.alphastd = alphastd
+        self.eigval = torch.Tensor(eigval)
+        self.eigvec = torch.Tensor(eigvec)
+
+    def __call__(self, img):
+        if self.alphastd == 0:
+            return img
+
+        alpha = img.new().resize_(3).normal_(0, self.alphastd)
+        rgb = self.eigvec.type_as(img).clone() \
+            .mul(alpha.view(1, 3).expand(3, 3)) \
+            .mul(self.eigval.view(1, 3).expand(3, 3)) \
+            .sum(1).squeeze()
+
+        return img.add(rgb.view(3, 1, 1).expand_as(img))
+
+
+class CutoutDefault(object):
+    """
+    Reference : https://github.com/quark0/darts/blob/master/cnn/utils.py
+    """
+    def __init__(self, length):
+        self.length = length
+
+    def __call__(self, img):
+        h, w = img.size(1), img.size(2)
+        mask = np.ones((h, w), np.float32)
+        y = np.random.randint(h)
+        x = np.random.randint(w)
+
+        y1 = np.clip(y - self.length // 2, 0, h)
+        y2 = np.clip(y + self.length // 2, 0, h)
+        x1 = np.clip(x - self.length // 2, 0, w)
+        x2 = np.clip(x + self.length // 2, 0, w)
+
+        mask[y1: y2, x1: x2] = 0.
+        mask = torch.from_numpy(mask)
+        mask = mask.expand_as(img)
+        img *= mask
+        return img
+
+PARAMETER_MAX = 1
+class RandAugment:
+    def __init__(self, n, m):
+        self.n = n
+        self.m = m      # [0, PARAMETER_MAX]
+        self.augment_list = augment_list()
+
+    def __call__(self, img):
+        ops = random.choices(self.augment_list, k=self.n)
+        for op, minval, maxval in ops:
+            val = (float(self.m) / PARAMETER_MAX) * float(maxval - minval) + minval
+            img = op(img, val)
+
+        return img
diff --git a/higher/smart_aug/benchmark.py b/higher/smart_aug/benchmark.py
index bc19929..f2d453c 100644
--- a/higher/smart_aug/benchmark.py
+++ b/higher/smart_aug/benchmark.py
@@ -1,3 +1,6 @@
+""" Script to run series of experiments.
+
+"""
 from dataug import *
 #from utils import *
 from train_utils import *
@@ -13,14 +16,16 @@ optim_param={
     },
     'Inner':{
         'optim': 'SGD',
-        'lr':1e-1, #1e-2 #1e-1 for ResNet
+        'lr':1e-2, #1e-2 #1e-1 for ResNet
         'momentum':0.9, #0.9
     }
 }
 
 res_folder="../res/benchmark/CIFAR10/"
-epochs= 150
+#res_folder="../res/HPsearch/"
+epochs= 200
 dataug_epoch_start=0
+nb_run= 3
 
 # Use available TF (see transformations.py)
 tf_names = [
@@ -80,60 +85,107 @@ if __name__ == "__main__":
     '''
     for model_type in model_list.keys():
             for model_name in model_list[model_type]:
-                model = getattr(model_type, model_name)(pretrained=False)
+                for run in range(nb_run):
 
-                t0 = time.process_time()
+                    torch.cuda.reset_max_memory_cached() #reset_peak_stats
+                    t0 = time.perf_counter()
 
-                model = Higher_model(model) #run_dist_dataugV3
-                if n_inner_iter!=0:
-                    aug_model = Augmented_model(
-                        Data_augV5(TF_dict=tf_dict, 
-                            N_TF=n_tf, 
-                            mix_dist=dist, 
-                            fixed_prob=p_setup, 
-                            fixed_mag=m_setup[0], 
-                            shared_mag=m_setup[1]), 
-                        model).to(device)
-                else:
-                    aug_model = Augmented_model(RandAug(TF_dict=tf_dict, N_TF=n_tf), model).to(device)
+                    model = getattr(model_type, model_name)(pretrained=False)
 
-                print("{} on {} for {} epochs - {} inner_it".format(str(aug_model), device_name, epochs, n_inner_iter))
-                log= run_dist_dataugV3(model=aug_model,
-                     epochs=epochs, 
-                     inner_it=n_inner_iter, 
-                     dataug_epoch_start=dataug_epoch_start, 
-                     opt_param=optim_param,
-                     print_freq=epochs/4, 
-                     unsup_loss=1, 
-                     hp_opt=False,
-                     save_sample_freq=None)
+                    model = Higher_model(model, model_name) #run_dist_dataugV3
+                    if n_inner_iter!=0:
+                        aug_model = Augmented_model(
+                            Data_augV5(TF_dict=tf_dict, 
+                                N_TF=n_tf, 
+                                mix_dist=dist, 
+                                fixed_prob=p_setup, 
+                                fixed_mag=m_setup[0], 
+                                shared_mag=m_setup[1]), 
+                            model).to(device)
+                    else:
+                        aug_model = Augmented_model(RandAug(TF_dict=tf_dict, N_TF=n_tf), model).to(device)
 
-                exec_time=time.process_time() - t0
-                ####
-                print('-'*9)
-                times = [x["time"] for x in log]
-                out = {"Accuracy": max([x["acc"] for x in log]), "Time": (np.mean(times),np.std(times), exec_time), 'Optimizer': optim_param, "Device": device_name, "Param_names": aug_model.TF_names(), "Log": log}
-                print(str(aug_model),": acc", out["Accuracy"], "in:", out["Time"][0], "+/-", out["Time"][1])
-                filename = "{}-{} epochs (dataug:{})- {} in_it-{}".format(str(aug_model),epochs,dataug_epoch_start,n_inner_iter, run)
-                with open("../res/log/%s.json" % filename, "w+") as f:
-                    try:
-                        json.dump(out, f, indent=True)
-                        print('Log :\"',f.name, '\" saved !')
-                    except:
-                        print("Failed to save logs :",f.name)
+                    print("{} on {} for {} epochs - {} inner_it".format(str(aug_model), device_name, epochs, n_inner_iter))
+                    log= run_dist_dataugV3(model=aug_model,
+                         epochs=epochs, 
+                         inner_it=n_inner_iter, 
+                         dataug_epoch_start=dataug_epoch_start, 
+                         opt_param=optim_param,
+                         print_freq=epochs/4, 
+                         unsup_loss=1, 
+                         hp_opt=False,
+                         save_sample_freq=None)
 
-                print('Execution Time : %.00f '%(exec_time))
-                print('-'*9)
-    
+                    exec_time=time.perf_counter() - t0
+                    max_cached = torch.cuda.max_memory_cached()/(1024.0 * 1024.0) #torch.cuda.max_memory_reserved() #MB
+                    ####
+                    print('-'*9)
+                    times = [x["time"] for x in log]
+                    out = {"Accuracy": max([x["acc"] for x in log]), 
+                        "Time": (np.mean(times),np.std(times), exec_time), 
+                        'Optimizer': optim_param, 
+                        "Device": device_name, 
+                        "Memory": max_cached, 
+                        "Param_names": aug_model.TF_names(), 
+                        "Log": log}
+                    print(str(aug_model),": acc", out["Accuracy"], "in:", out["Time"][0], "+/-", out["Time"][1])
+                    filename = "{}-{} epochs (dataug:{})- {} in_it-{}".format(str(aug_model),epochs,dataug_epoch_start,n_inner_iter, run)
+                    with open(res_folder+"log/%s.json" % filename, "w+") as f:
+                        try:
+                            json.dump(out, f, indent=True)
+                            print('Log :\"',f.name, '\" saved !')
+                        except:
+                            print("Failed to save logs :",f.name)
+
+                    print('Execution Time : %.00f '%(exec_time))
+                    print('-'*9)
     '''
+    ### Benchmark - RandAugment ###
+    for model_type in model_list.keys():
+            for model_name in model_list[model_type]:
+                for run in range(nb_run):
+                    torch.cuda.reset_max_memory_cached() #reset_peak_stats
+                    t0 = time.perf_counter()
+
+                    model = getattr(model_type, model_name)(pretrained=False).to(device)
+
+                    print("RandAugment(N{}-M{})-{} on {} for {} epochs".format(rand_aug['N'],rand_aug['M'],model_name, device_name, epochs))
+                    log= train_classic(model=model, opt_param=optim_param, epochs=epochs, print_freq=epochs/4)
+
+                    exec_time=time.perf_counter() - t0
+                    max_cached = torch.cuda.max_memory_cached()/(1024.0 * 1024.0) #torch.cuda.max_memory_reserved() #MB
+                    ####
+                    print('-'*9)
+                    times = [x["time"] for x in log]
+                    out = {"Accuracy": max([x["acc"] for x in log]), 
+                        "Time": (np.mean(times),np.std(times), exec_time), 
+                        'Optimizer': optim_param, 
+                        "Device": device_name, 
+                        "Memory": max_cached,
+                        "Rand_Aug": rand_aug, 
+                        "Log": log}
+                    print("RandAugment-",model_name,": acc", out["Accuracy"], "in:", out["Time"][0], "+/-", out["Time"][1])
+                    filename = "RandAugment(N{}-M{})-{}-{} epochs -{}".format(rand_aug['N'],rand_aug['M'],model_name,epochs, run)
+                    with open(res_folder+"log/%s.json" % filename, "w+") as f:
+                        try:
+                            json.dump(out, f, indent=True)
+                            print('Log :\"',f.name, '\" saved !')
+                        except:
+                            print("Failed to save logs :",f.name)
+
+                    #plot_resV2(log, fig_name=res_folder+filename)
+
+                    print('Execution Time : %.00f '%(exec_time))
+                    print('-'*9)
 
     ### HP Search ###
+    '''
+    from LeNet import *
     inner_its = [1]
     dist_mix = [0.0, 0.5, 0.8, 1.0]
-    N_seq_TF= [2, 3, 4]
+    N_seq_TF= [3, 2, 4]
     mag_setup = [(True,True), (False, False)] #(FxSh, Independant)
     #prob_setup = [True, False]
-    nb_run= 3
     
     try:
         os.mkdir(res_folder)
@@ -150,9 +202,10 @@ if __name__ == "__main__":
                         p_setup=False
                         for run in range(nb_run):
 
-                            t0 = time.process_time()
+                            t0 = time.perf_counter()
 
-                            model = getattr(models.resnet, 'resnet18')(pretrained=False)
+                            #model = getattr(models.resnet, 'resnet18')(pretrained=False)
+                            model = LeNet(3,10)
                             model = Higher_model(model) #run_dist_dataugV3
                             aug_model = Augmented_model(Data_augV5(TF_dict=tf_dict, N_TF=n_tf, mix_dist=dist, fixed_prob=p_setup, fixed_mag=m_setup[0], shared_mag=m_setup[1]), model).to(device)
                             #aug_model = Augmented_model(RandAug(TF_dict=tf_dict, N_TF=2), model).to(device)
@@ -168,7 +221,7 @@ if __name__ == "__main__":
                                  hp_opt=False,
                                  save_sample_freq=None)
 
-                            exec_time=time.process_time() - t0
+                            exec_time=time.perf_counter() - t0
                             ####
                             print('-'*9)
                             times = [x["time"] for x in log]
@@ -184,4 +237,4 @@ if __name__ == "__main__":
 
                             print('Execution Time : %.00f '%(exec_time))
                             print('-'*9)
-                        #'''
+    '''
diff --git a/higher/smart_aug/datasets.py b/higher/smart_aug/datasets.py
index 1ec3241..de81d5d 100755
--- a/higher/smart_aug/datasets.py
+++ b/higher/smart_aug/datasets.py
@@ -1,6 +1,6 @@
 """ Dataset definition.
 
-    MNIST / CIFAR10
+    MNIST / CIFAR10 / CIFAR100 / SVHN / ImageNet
 """
 import torch
 from torch.utils.data.dataset import ConcatDataset
@@ -37,9 +37,16 @@ transform_train = torchvision.transforms.Compose([
     #transforms.RandomVerticalFlip(),
     torchvision.transforms.ToTensor(),
 ])
-#from RandAugment import RandAugment
+
+## RandAugment ##
+from RandAugment import RandAugment
 # Add RandAugment with N, M(hyperparameter)
-#transform_train.transforms.insert(0, RandAugment(n=2, m=30))
+rand_aug={'N': 2, 'M': 1}
+#rand_aug={'N': 2, 'M': 9./30} #RN-ImageNet
+#rand_aug={'N': 3, 'M': 5./30} #WRN-CIFAR10
+#rand_aug={'N': 2, 'M': 14./30} #WRN-CIFAR100
+#rand_aug={'N': 3, 'M': 7./30} #WRN-SVHN
+transform_train.transforms.insert(0, RandAugment(n=rand_aug['N'], m=rand_aug['M']))
 
 ### Classic Dataset ###
 
@@ -50,7 +57,7 @@ transform_train = torchvision.transforms.Compose([
 
 #CIFAR
 data_train = torchvision.datasets.CIFAR10(dataroot, train=True, download=download_data, transform=transform_train)
-#data_val = torchvision.datasets.CIFAR10(dataroot, train=True, download=download_data, transform=transform)
+data_val = torchvision.datasets.CIFAR10(dataroot, train=True, download=download_data, transform=transform)
 data_test = torchvision.datasets.CIFAR10(dataroot, train=False, download=download_data, transform=transform)
 
 #data_train = torchvision.datasets.CIFAR100(dataroot, train=True, download=download_data, transform=transform_train)
@@ -72,32 +79,18 @@ data_test = torchvision.datasets.CIFAR10(dataroot, train=False, download=downloa
 
 #Validation set size [0, 1]
 valid_size=0.1
-#train_subset_indices=range(int(len(data_train)*(1-valid_size)))
-#val_subset_indices=range(int(len(data_train)*(1-valid_size)),len(data_train))
+train_subset_indices=range(int(len(data_train)*(1-valid_size)))
+val_subset_indices=range(int(len(data_train)*(1-valid_size)),len(data_train))
 #train_subset_indices=range(BATCH_SIZE*10)
 #val_subset_indices=range(BATCH_SIZE*10, BATCH_SIZE*20)
 
-#from torch.utils.data import SubsetRandomSampler
-#dl_train = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(train_subset_indices), num_workers=num_workers, pin_memory=pin_memory)
-#dl_val = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(val_subset_indices), num_workers=num_workers, pin_memory=pin_memory)
+from torch.utils.data import SubsetRandomSampler
+dl_train = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(train_subset_indices), num_workers=num_workers, pin_memory=pin_memory)
+dl_val = torch.utils.data.DataLoader(data_val, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(val_subset_indices), num_workers=num_workers, pin_memory=pin_memory)
 dl_test = torch.utils.data.DataLoader(data_test, batch_size=TEST_SIZE, shuffle=False, num_workers=num_workers, pin_memory=pin_memory)
 
 #Cross Validation
 '''
-from skorch.dataset import CVSplit
-import numpy as np
-cvs = CVSplit(cv=valid_size, stratified=True) #Stratified =True for unbalanced dataset #ShuffleSplit
-
-def next_CVSplit():
-
-    train_subset, val_subset = cvs(data_train, y=np.array(data_train.targets))
-    dl_train = torch.utils.data.DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=pin_memory)
-    dl_val = torch.utils.data.DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=pin_memory)
-
-    return dl_train, dl_val
-
-dl_train, dl_val = next_CVSplit()
-'''
 import numpy as np
 from sklearn.model_selection import ShuffleSplit
 from sklearn.model_selection import StratifiedShuffleSplit
@@ -134,7 +127,7 @@ class CVSplit(object):
         else:
             cv_cls = ShuffleSplit
 
-        self._cv= cv_cls(test_size=val_size, random_state=0)
+        self._cv= cv_cls(test_size=val_size, random_state=0) #Random state w/ fixed seed
 
     def next_split(self):
         """ Get next cross-validation split.
@@ -157,4 +150,21 @@ class CVSplit(object):
         return dl_train, dl_val
 
 cvs = CVSplit(data_train, val_size=valid_size)
-dl_train, dl_val = cvs.next_split()
\ No newline at end of file
+dl_train, dl_val = cvs.next_split()
+'''
+
+'''
+from skorch.dataset import CVSplit
+import numpy as np
+cvs = CVSplit(cv=valid_size, stratified=True) #Stratified =True for unbalanced dataset #ShuffleSplit
+
+def next_CVSplit():
+
+    train_subset, val_subset = cvs(data_train, y=np.array(data_train.targets))
+    dl_train = torch.utils.data.DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=pin_memory)
+    dl_val = torch.utils.data.DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=pin_memory)
+
+    return dl_train, dl_val
+
+dl_train, dl_val = next_CVSplit()
+'''
\ No newline at end of file