mirror of
https://github.com/AntoineHX/smart_augmentation.git
synced 2025-05-04 04:00:46 +02:00
183 lines
5.4 KiB
Python
183 lines
5.4 KiB
Python
|
import numpy as np
|
||
|
import json, math, time, os
|
||
|
from hyperopt import *
|
||
|
import gc
|
||
|
|
||
|
BATCH_SIZE = 300
|
||
|
|
||
|
mnist_train = torchvision.datasets.MNIST(
|
||
|
"./data", train=True, download=True, transform=torchvision.transforms.ToTensor()
|
||
|
)
|
||
|
|
||
|
mnist_test = torchvision.datasets.MNIST(
|
||
|
"./data", train=False, download=True, transform=torchvision.transforms.ToTensor()
|
||
|
)
|
||
|
|
||
|
dl_train = torch.utils.data.DataLoader(
|
||
|
mnist_train, batch_size=BATCH_SIZE, shuffle=False
|
||
|
)
|
||
|
dl_test = torch.utils.data.DataLoader(mnist_test, batch_size=10000, shuffle=False)
|
||
|
|
||
|
|
||
|
def test(model):
|
||
|
for i, (features_, labels_) in enumerate(dl_test):
|
||
|
features, labels = torch.reshape(features_, (10000, 28 * 28)), labels_
|
||
|
pred = model.forward(features)
|
||
|
return pred.argmax(dim=1).eq(labels).sum().item() / 10000 * 100
|
||
|
|
||
|
|
||
|
def train(model, epochs=3, height=1):
|
||
|
stats = []
|
||
|
for epoch in range(epochs):
|
||
|
for i, (features_, labels_) in enumerate(dl_train):
|
||
|
t0 = time.process_time()
|
||
|
model.begin()
|
||
|
features, labels = torch.reshape(features_, (BATCH_SIZE, 28 * 28)), labels_
|
||
|
pred = model.forward(
|
||
|
features
|
||
|
) # typo in https://www.groundai.com/project/gradient-descent-the-ultimate-optimizer/
|
||
|
loss = F.nll_loss(pred, labels)
|
||
|
model.zero_grad()
|
||
|
loss.backward(create_graph=True)
|
||
|
model.adjust()
|
||
|
tf = time.process_time()
|
||
|
data = {
|
||
|
"time": tf - t0,
|
||
|
"iter": epoch * len(dl_train) + i,
|
||
|
"loss": loss.item(),
|
||
|
"params": {
|
||
|
k: v.item()
|
||
|
for k, v in model.optimizer.parameters.items()
|
||
|
if "." not in k
|
||
|
},
|
||
|
}
|
||
|
stats.append(data)
|
||
|
return stats
|
||
|
|
||
|
|
||
|
def run(opt, name="out", usr={}, epochs=3, height=1):
|
||
|
torch.manual_seed(0x42)
|
||
|
model = MNIST_FullyConnected(28 * 28, 128, 10, opt)
|
||
|
print("Running...", str(model))
|
||
|
model.initialize()
|
||
|
log = train(model, epochs, height)
|
||
|
acc = test(model)
|
||
|
out = {"acc": acc, "log": log, "usr": usr}
|
||
|
with open("log/%s.json" % name, "w+") as f:
|
||
|
json.dump(out, f, indent=True)
|
||
|
times = [x["time"] for x in log]
|
||
|
print("Times (ms):", np.mean(times), "+/-", np.std(times))
|
||
|
print("Final accuracy:", acc)
|
||
|
return out
|
||
|
|
||
|
|
||
|
def sgd_experiments():
|
||
|
run(SGD(0.01), "sgd", epochs=1)
|
||
|
out = run(SGD(0.01, optimizer=SGD(0.01)), "sgd+sgd", epochs=1)
|
||
|
alpha = out["log"][-1]["params"]["alpha"]
|
||
|
print(alpha)
|
||
|
run(SGD(alpha), "sgd-final", epochs=1)
|
||
|
|
||
|
|
||
|
def adam_experiments():
|
||
|
run(Adam(), "adam", epochs=1)
|
||
|
print()
|
||
|
mo = SGDPerParam(
|
||
|
0.001, ["alpha", "beta1", "beta2", "log_eps"], optimizer=SGD(0.0001)
|
||
|
)
|
||
|
out = run(Adam(optimizer=mo), "adam+sgd", epochs=1)
|
||
|
p = out["log"][-1]["params"]
|
||
|
alpha = p["alpha"]
|
||
|
beta1 = Adam.clamp(torch.tensor(p["beta1"])).item()
|
||
|
beta2 = Adam.clamp(torch.tensor(p["beta2"])).item()
|
||
|
log_eps = p["log_eps"]
|
||
|
print(alpha, beta1, beta2, log_eps)
|
||
|
print(mo)
|
||
|
run(
|
||
|
Adam(alpha=p["alpha"], beta1=beta1, beta2=beta2, log_eps=log_eps),
|
||
|
"adam+sgd-final",
|
||
|
epochs=1,
|
||
|
)
|
||
|
print()
|
||
|
out = run(Adam(optimizer=Adam()), "adam2", epochs=1)
|
||
|
p = out["log"][-1]["params"]
|
||
|
alpha = p["alpha"]
|
||
|
beta1 = Adam.clamp(torch.tensor(p["beta1"])).item()
|
||
|
beta2 = Adam.clamp(torch.tensor(p["beta2"])).item()
|
||
|
log_eps = p["log_eps"]
|
||
|
print(alpha, beta1, beta2, log_eps)
|
||
|
run(
|
||
|
Adam(alpha=p["alpha"], beta1=beta1, beta2=beta2, log_eps=log_eps),
|
||
|
"adam2-final",
|
||
|
epochs=1,
|
||
|
)
|
||
|
print()
|
||
|
mo = SGDPerParam(0.001, ["alpha"], optimizer=SGD(0.0001))
|
||
|
out = run(AdamBaydin(optimizer=mo), "adambaydin+sgd", epochs=1)
|
||
|
p = out["log"][-1]["params"]
|
||
|
alpha = p["alpha"]
|
||
|
print(alpha)
|
||
|
print(mo)
|
||
|
run(Adam(alpha=p["alpha"]), "adambaydin+sgd-final", epochs=1)
|
||
|
print()
|
||
|
out = run(AdamBaydin(optimizer=Adam()), "adambaydin2", epochs=1)
|
||
|
p = out["log"][-1]["params"]
|
||
|
alpha = p["alpha"]
|
||
|
print(alpha)
|
||
|
run(Adam(alpha=p["alpha"]), "adambaydin2-final", epochs=1)
|
||
|
|
||
|
|
||
|
def surface():
|
||
|
run(SGD(10 ** -3, optimizer=SGD(10 ** -1)), "tst", epochs=1)
|
||
|
for log_alpha in np.linspace(-3, 2, 10):
|
||
|
run(SGD(10 ** log_alpha), "sgd@1e%+.2f" % log_alpha, epochs=1)
|
||
|
|
||
|
|
||
|
def make_sgd_stack(height, top):
|
||
|
if height == 0:
|
||
|
return SGD(alpha=top)
|
||
|
return SGD(alpha=top, optimizer=make_sgd_stack(height - 1, top))
|
||
|
|
||
|
|
||
|
def make_adam_stack(height, top=0.0000001):
|
||
|
if height == 0:
|
||
|
return Adam(alpha=top)
|
||
|
return Adam(alpha=top, optimizer=make_adam_stack(height - 1))
|
||
|
|
||
|
|
||
|
def stack_test():
|
||
|
for top in np.linspace(-7, 3, 20):
|
||
|
for height in range(6):
|
||
|
print("height =", height, "to p=", top)
|
||
|
opt = make_sgd_stack(height, 10 ** top)
|
||
|
run(
|
||
|
opt,
|
||
|
"metasgd3-%d@%+.2f" % (height, top),
|
||
|
{"height": height, "top": top},
|
||
|
epochs=1,
|
||
|
height=height,
|
||
|
)
|
||
|
gc.collect()
|
||
|
|
||
|
|
||
|
def perf_test():
|
||
|
for h in range(51):
|
||
|
print("height:", h)
|
||
|
# opt = make_sgd_stack(h, 0.01)
|
||
|
opt = make_adam_stack(h)
|
||
|
run(opt, "adamperf-%d" % h, {"height": h}, epochs=1)
|
||
|
gc.collect()
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
try:
|
||
|
os.mkdir("log")
|
||
|
except:
|
||
|
print("log/ exists already")
|
||
|
|
||
|
surface()
|
||
|
sgd_experiments()
|
||
|
adam_experiments()
|
||
|
stack_test()
|
||
|
perf_test()
|