In the practical session, we will implement a Convolutional Neural Network (CNN) to classify images from the CIFAR-10 dataset. The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with 6000 images per class. There are 50000 training images and 10000 test images. We will compare the results with a traditional Dense Neural Network (DNN) and a CNN.
First, we load the necessary libraries:
# PyTorch library, provides tensor computation and deep neural networks
import torch
# Package that provides access to popular datasets and image transformations for computer vision
import torchvision
from torchvision import datasets, transforms
import torch.nn as nn # Provides classes to define and manipulate neural networks
import torch.nn.functional as F # Contains functions that do not have any parameters, such as relu, tanh, etc.
import torch.optim as optim # Package implementing various optimization algorithms
# Library for the Python programming language, adding support for large, multi-dimensional arrays and matrices.
import numpy as np
import matplotlib.pyplot as plt # Library for creating static, animated, and interactive visualizations in Python
To accelerate the training of the CNN, we will use the GPU (NVIDIA CUDA), the TPU (Google Colab Pro) or the Apple Shared Neural Engine (M1/M2/M3 chip).
# Set the device (cuda, cpu or mps)
if torch.cuda.is_available():
device = torch.device("cuda")
elif torch.backends.mps.is_available() and torch.backends.mps.is_built():
device = torch.device("mps")
else:
device = torch.device("cpu")
print("Device used: {}".format(device))
Device used: cuda
The CIFAR-10 dataset is available in torchvision.datasets:
# number of subprocesses to use for data loading
num_workers = 0
# how many sampls per batch to load
batch_size = 64
# percentage of training set to use as validation
valid_size = 0.2
# convert data to a normalized torch.FloatTensor
transform = transforms.Compose(
[transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
)
# Choose the training and test datasets
train_data = datasets.CIFAR10("data", train=True, download=True, transform=transform)
test_data = datasets.CIFAR10("data", train=False, download=True, transform=transform)
# prepare data loaders (combine dataset and sampler)
train_loader = torch.utils.data.DataLoader(
train_data, batch_size=batch_size, num_workers=num_workers, shuffle=True
)
test_loader = torch.utils.data.DataLoader(
test_data, batch_size=batch_size, num_workers=num_workers, shuffle=False
)
Files already downloaded and verified Files already downloaded and verified
We can load the first batch and display the images:
# Load the first batch of the test data
examples = enumerate(test_loader)
batch_idx, (example_data, example_targets) = next(examples)
# plot the images in the batch, along with the corresponding labels
fig = plt.figure(figsize=(10, 4))
# display 20 images
for idx in np.arange(27):
ax = fig.add_subplot(3, 9, idx + 1, xticks=[], yticks=[])
imshow(example_data[idx].detach().numpy())
ax.set_title(classes[example_targets[idx]])
plt.tight_layout()
plt.show()
We can define the training/eval functions:
def train_n_test(model, optimizer, nepochs, criterion, fname):
# number of epochs to train the model
n_epochs = [*range(1, nepochs + 1)]
# List to store loss to visualize
valid_loss_min = np.Inf # track change in validation loss
train_losses = []
train_counter = []
test_losses = []
acc_eval = []
test_counter = [i * len(train_loader.dataset) for i in n_epochs]
for epoch in n_epochs:
# keep track of training and validation loss
valid_loss = 0.0
###################
# train the model #
###################
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
# move tensors to GPU/MPS if CUDA/MPS is available
data, target = data.to(device), target.to(device)
# clear the gradients of all optimized variables
optimizer.zero_grad()
# forward pass: compute predicted outputs by passing inputs to the model
output = model(data)
# calculate the batch loss
loss = criterion(output, target)
# backward pass: compute gradient of the loss with respect to model parameters
loss.backward()
# perform a single optimization step (parameter update)
optimizer.step()
# update training loss
train_losses.append(loss.item())
train_counter.append(
(batch_idx * 64) + ((epoch - 1) * len(train_loader.dataset))
)
######################
# validate the model #
######################
model.eval()
correct = 0
for data, target in test_loader:
# move tensors to GPU/MPS if CUDA/MPS is available
data, target = data.to(device), target.to(device)
# forward pass: compute predicted outputs by passing inputs to the model
output = model(data)
# calculate the batch loss
loss = criterion(output, target)
# update average validation loss
valid_loss += loss.item() * data.size(0)
pred = output.data.max(1, keepdim=True)[1]
correct += pred.eq(target.data.view_as(pred)).sum().item()
# calculate average losses
train_loss = np.mean(train_losses[(epoch - 1) * len(train_loader) :])
valid_loss = valid_loss / len(test_loader.dataset)
acc_eval.append(correct / len(test_loader.dataset) * 100)
test_losses.append(valid_loss)
# print training/validation statistics
print(
"Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}".format(
epoch, train_loss, valid_loss
)
)
# save model if validation loss has decreased
if valid_loss <= valid_loss_min:
print(
"Validation loss decreased ({:.6f} --> {:.6f}). Saving model ...".format(
valid_loss_min, valid_loss
)
)
torch.save(model.state_dict(), fname)
valid_loss_min = valid_loss
torch.save(
{
"test_loss": (test_counter, test_losses),
"train_loss": (train_counter, train_losses),
"acc_eval": acc_eval,
},
f"res_{fname}.pt",
)
def plot_accuracies(fnames, names):
num_models = len(fnames)
blues = plt.cm.Blues(np.linspace(0, 1, num_models + 1))
accs_eval = []
plt.figure(figsize=(10, 5))
for i, fname in enumerate(fnames):
x = torch.load(f"res_{fname}.pt")
test_counter, test_losses = x["test_loss"]
acc_eval = x["acc_eval"]
accs_eval.append(acc_eval)
plt.plot(
test_counter,
acc_eval,
color=blues[i + 1],
)
plt.legend(
[
f"{name} Best Accuracy: {max(acc_eval):.2f}%"
for name, acc_eval in zip(names, accs_eval)
],
loc="lower right",
)
for i, fname in enumerate(fnames):
x = torch.load(f"res_{fname}.pt")
test_counter, test_losses = x["test_loss"]
acc_eval = x["acc_eval"]
plt.scatter(
[test_counter[np.argmax(acc_eval)]],
[max(acc_eval)],
color="black",
zorder=+200,
)
plt.xlim([min(test_counter), max(test_counter)])
plt.ylim([0, 100])
plt.xlabel("Number of Examples Seen by the model")
plt.ylabel("Accuracy")
plt.annotate(
"",
xy=(min(test_counter) - 0.3, 105),
xytext=(min(test_counter) - 0.3, -5),
arrowprops=dict(arrowstyle="->"),
annotation_clip=False,
zorder=-100,
)
plt.annotate(
"",
xy=(max(test_counter) + 10000, -0.0),
xytext=(min(test_counter) - 1000, -0.0),
arrowprops=dict(arrowstyle="->"),
annotation_clip=False,
zorder=-100,
)
We define the loss:
criterion = nn.CrossEntropyLoss()
First we can define a Dende Neural Network (DNN) with 3 hidden layers:
class FC(nn.Module):
def __init__(self):
super(FC, self).__init__()
self.fc1 = nn.Linear(3 * 32 * 32, 1024)
self.fc2 = nn.Linear(1024, 256)
self.fc3 = nn.Linear(256, 64)
self.fc4 = nn.Linear(64, 10)
def forward(self, x):
x = x.view(-1, 3 * 32 * 32)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
x = self.fc4(x)
return x
We instantiate the DNN model, the optimizer and we can count the number of parameters:
model = FC().to(device) # Create the model and move it to the device
model_parameters = filter(lambda p: p.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print("Number of parameters: ", params)
Number of parameters: 3426250
We can train the DNN model:
optimizer = optim.SGD(model.parameters(), lr=0.001) # Define the optimizer
train_n_test(model, optimizer, 30, criterion, "sgd_cifar_fc.pt")
Epoch: 1 Training Loss: 2.298408 Validation Loss: 2.293379 Validation loss decreased (inf --> 2.293379). Saving model ... Epoch: 2 Training Loss: 2.287825 Validation Loss: 2.282024 Validation loss decreased (2.293379 --> 2.282024). Saving model ... Epoch: 3 Training Loss: 2.275021 Validation Loss: 2.267214 Validation loss decreased (2.282024 --> 2.267214). Saving model ... Epoch: 4 Training Loss: 2.257316 Validation Loss: 2.245834 Validation loss decreased (2.267214 --> 2.245834). Saving model ... Epoch: 5 Training Loss: 2.231110 Validation Loss: 2.214038 Validation loss decreased (2.245834 --> 2.214038). Saving model ... Epoch: 6 Training Loss: 2.193528 Validation Loss: 2.170689 Validation loss decreased (2.214038 --> 2.170689). Saving model ... Epoch: 7 Training Loss: 2.147381 Validation Loss: 2.122519 Validation loss decreased (2.170689 --> 2.122519). Saving model ... Epoch: 8 Training Loss: 2.100531 Validation Loss: 2.076545 Validation loss decreased (2.122519 --> 2.076545). Saving model ... Epoch: 9 Training Loss: 2.057069 Validation Loss: 2.033966 Validation loss decreased (2.076545 --> 2.033966). Saving model ... Epoch: 10 Training Loss: 2.017031 Validation Loss: 1.995049 Validation loss decreased (2.033966 --> 1.995049). Saving model ... Epoch: 11 Training Loss: 1.980282 Validation Loss: 1.959829 Validation loss decreased (1.995049 --> 1.959829). Saving model ... Epoch: 12 Training Loss: 1.947010 Validation Loss: 1.929186 Validation loss decreased (1.959829 --> 1.929186). Saving model ... Epoch: 13 Training Loss: 1.917824 Validation Loss: 1.901648 Validation loss decreased (1.929186 --> 1.901648). Saving model ... Epoch: 14 Training Loss: 1.891703 Validation Loss: 1.877327 Validation loss decreased (1.901648 --> 1.877327). Saving model ... Epoch: 15 Training Loss: 1.868050 Validation Loss: 1.854632 Validation loss decreased (1.877327 --> 1.854632). Saving model ... Epoch: 16 Training Loss: 1.846225 Validation Loss: 1.834231 Validation loss decreased (1.854632 --> 1.834231). Saving model ... Epoch: 17 Training Loss: 1.826443 Validation Loss: 1.814482 Validation loss decreased (1.834231 --> 1.814482). Saving model ... Epoch: 18 Training Loss: 1.807416 Validation Loss: 1.796260 Validation loss decreased (1.814482 --> 1.796260). Saving model ... Epoch: 19 Training Loss: 1.789477 Validation Loss: 1.778376 Validation loss decreased (1.796260 --> 1.778376). Saving model ... Epoch: 20 Training Loss: 1.772407 Validation Loss: 1.762206 Validation loss decreased (1.778376 --> 1.762206). Saving model ... Epoch: 21 Training Loss: 1.756357 Validation Loss: 1.746533 Validation loss decreased (1.762206 --> 1.746533). Saving model ... Epoch: 22 Training Loss: 1.741044 Validation Loss: 1.731481 Validation loss decreased (1.746533 --> 1.731481). Saving model ... Epoch: 23 Training Loss: 1.726189 Validation Loss: 1.716993 Validation loss decreased (1.731481 --> 1.716993). Saving model ... Epoch: 24 Training Loss: 1.712934 Validation Loss: 1.703785 Validation loss decreased (1.716993 --> 1.703785). Saving model ... Epoch: 25 Training Loss: 1.699721 Validation Loss: 1.691575 Validation loss decreased (1.703785 --> 1.691575). Saving model ... Epoch: 26 Training Loss: 1.687766 Validation Loss: 1.679835 Validation loss decreased (1.691575 --> 1.679835). Saving model ... Epoch: 27 Training Loss: 1.675872 Validation Loss: 1.668598 Validation loss decreased (1.679835 --> 1.668598). Saving model ... Epoch: 28 Training Loss: 1.664840 Validation Loss: 1.657806 Validation loss decreased (1.668598 --> 1.657806). Saving model ... Epoch: 29 Training Loss: 1.654181 Validation Loss: 1.648067 Validation loss decreased (1.657806 --> 1.648067). Saving model ... Epoch: 30 Training Loss: 1.644267 Validation Loss: 1.638514 Validation loss decreased (1.648067 --> 1.638514). Saving model ...
plot("sgd_cifar_fc.pt")
By using Adam optimizer, we reach a better performance:
optimizer = optim.Adam(model.parameters(), lr=0.001) # Define the optimizer
train_n_test(model, optimizer, 30, criterion, "adam_cifar_fc.pt")
Epoch: 1 Training Loss: 1.647408 Validation Loss: 1.517360 Validation loss decreased (inf --> 1.517360). Saving model ... Epoch: 2 Training Loss: 1.452193 Validation Loss: 1.451674 Validation loss decreased (1.517360 --> 1.451674). Saving model ... Epoch: 3 Training Loss: 1.342919 Validation Loss: 1.386244 Validation loss decreased (1.451674 --> 1.386244). Saving model ... Epoch: 4 Training Loss: 1.251634 Validation Loss: 1.386287 Epoch: 5 Training Loss: 1.171663 Validation Loss: 1.347294 Validation loss decreased (1.386244 --> 1.347294). Saving model ... Epoch: 6 Training Loss: 1.097073 Validation Loss: 1.361448 Epoch: 7 Training Loss: 1.026438 Validation Loss: 1.386757 Epoch: 8 Training Loss: 0.948088 Validation Loss: 1.403232 Epoch: 9 Training Loss: 0.882839 Validation Loss: 1.444463 Epoch: 10 Training Loss: 0.814854 Validation Loss: 1.502697 Epoch: 11 Training Loss: 0.756677 Validation Loss: 1.604978 Epoch: 12 Training Loss: 0.703948 Validation Loss: 1.698845 Epoch: 13 Training Loss: 0.654056 Validation Loss: 1.726790 Epoch: 14 Training Loss: 0.605096 Validation Loss: 1.792672 Epoch: 15 Training Loss: 0.566104 Validation Loss: 1.950972 Epoch: 16 Training Loss: 0.538840 Validation Loss: 1.945389 Epoch: 17 Training Loss: 0.490365 Validation Loss: 2.059897 Epoch: 18 Training Loss: 0.469634 Validation Loss: 2.034726 Epoch: 19 Training Loss: 0.447501 Validation Loss: 2.080638 Epoch: 20 Training Loss: 0.423825 Validation Loss: 2.313976 Epoch: 21 Training Loss: 0.405261 Validation Loss: 2.340054 Epoch: 22 Training Loss: 0.384436 Validation Loss: 2.416563 Epoch: 23 Training Loss: 0.364228 Validation Loss: 2.549311 Epoch: 24 Training Loss: 0.361097 Validation Loss: 2.448377 Epoch: 25 Training Loss: 0.342479 Validation Loss: 2.533963 Epoch: 26 Training Loss: 0.328682 Validation Loss: 2.579750 Epoch: 27 Training Loss: 0.310609 Validation Loss: 2.578867 Epoch: 28 Training Loss: 0.304096 Validation Loss: 2.731312 Epoch: 29 Training Loss: 0.296812 Validation Loss: 2.816342 Epoch: 30 Training Loss: 0.280095 Validation Loss: 2.909314
plot("adam_cifar_fc.pt")
By comparing the two optimizers, we can see that the Adam optimizer converges faster than the SGD optimizer:
plot_accuracies(["sgd_cifar_fc.pt", "adam_cifar_fc.pt"], ["SGD", "Adam"])
We consider now a small CNN with 2 convolutional layers and 3 fully connected layers:
class CNN(nn.Module):
def __init__(self): # Define the layers of the network
super(CNN, self).__init__()
self.conv1 = nn.Conv2d(
3, 6, 5
) # 3 input channels, 6 output channels, 5x5 kernel
self.pool = nn.MaxPool2d(2, 2) # 2x2 kernel
self.conv2 = nn.Conv2d(
6, 16, 5
) # 6 input channels, 16 output channels, 5x5 kernel
self.fc1 = nn.Linear(
16 * 5 * 5, 120
) # 16x5x5 input features, 120 output features
self.fc2 = nn.Linear(120, 84) # 120 input features, 84 output features
self.fc3 = nn.Linear(84, 10) # 84 input features, 10 output features
def forward(self, x): # Define the forward pass
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
We instantiate the CNN model, the optimizer and we can count the number of parameters:
model = CNN().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001) # Define the optimizer
model_parameters = filter(lambda p: p.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print("Number of parameters: ", params)
Number of parameters: 62006
The CNN has less parameters than the DNN !
We train the model using Adam:
model = CNN().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
train_n_test(model, optimizer, 30, criterion, "cifar_cnn.pt")
Epoch: 1 Training Loss: 1.659091 Validation Loss: 1.426273 Validation loss decreased (inf --> 1.426273). Saving model ... Epoch: 2 Training Loss: 1.348434 Validation Loss: 1.260200 Validation loss decreased (1.426273 --> 1.260200). Saving model ... Epoch: 3 Training Loss: 1.211291 Validation Loss: 1.190875 Validation loss decreased (1.260200 --> 1.190875). Saving model ... Epoch: 4 Training Loss: 1.124590 Validation Loss: 1.120725 Validation loss decreased (1.190875 --> 1.120725). Saving model ... Epoch: 5 Training Loss: 1.057546 Validation Loss: 1.075490 Validation loss decreased (1.120725 --> 1.075490). Saving model ... Epoch: 6 Training Loss: 1.002188 Validation Loss: 1.051245 Validation loss decreased (1.075490 --> 1.051245). Saving model ... Epoch: 7 Training Loss: 0.956595 Validation Loss: 1.016638 Validation loss decreased (1.051245 --> 1.016638). Saving model ... Epoch: 8 Training Loss: 0.913452 Validation Loss: 1.027900 Epoch: 9 Training Loss: 0.881005 Validation Loss: 1.008607 Validation loss decreased (1.016638 --> 1.008607). Saving model ... Epoch: 10 Training Loss: 0.849762 Validation Loss: 1.013531 Epoch: 11 Training Loss: 0.815056 Validation Loss: 1.006732 Validation loss decreased (1.008607 --> 1.006732). Saving model ... Epoch: 12 Training Loss: 0.789146 Validation Loss: 0.995275 Validation loss decreased (1.006732 --> 0.995275). Saving model ... Epoch: 13 Training Loss: 0.764450 Validation Loss: 1.000758 Epoch: 14 Training Loss: 0.739543 Validation Loss: 1.006464 Epoch: 15 Training Loss: 0.713389 Validation Loss: 1.030374 Epoch: 16 Training Loss: 0.690429 Validation Loss: 1.067279 Epoch: 17 Training Loss: 0.669459 Validation Loss: 1.065921 Epoch: 18 Training Loss: 0.647894 Validation Loss: 1.072320 Epoch: 19 Training Loss: 0.630330 Validation Loss: 1.102415 Epoch: 20 Training Loss: 0.606180 Validation Loss: 1.125398 Epoch: 21 Training Loss: 0.594327 Validation Loss: 1.186328 Epoch: 22 Training Loss: 0.578217 Validation Loss: 1.166590 Epoch: 23 Training Loss: 0.556354 Validation Loss: 1.161449 Epoch: 24 Training Loss: 0.543732 Validation Loss: 1.207963 Epoch: 25 Training Loss: 0.526854 Validation Loss: 1.209673 Epoch: 26 Training Loss: 0.509426 Validation Loss: 1.229373 Epoch: 27 Training Loss: 0.498067 Validation Loss: 1.277879 Epoch: 28 Training Loss: 0.481433 Validation Loss: 1.369175 Epoch: 29 Training Loss: 0.471013 Validation Loss: 1.357426 Epoch: 30 Training Loss: 0.460048 Validation Loss: 1.385186
plot("cifar_cnn.pt")
What can we observe ?
We consider a larger/wider model with batch normalization and dropout:
class LargeCNN(nn.Module):
def __init__(self, batch_norm=False, dropout=False):
super(LargeCNN, self).__init__()
self.conv_layer1 = nn.Sequential(
# Conv Layer block 1
nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1),
nn.BatchNorm2d(32) if batch_norm else nn.Identity(),
nn.ReLU(inplace=True),
nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
)
self.conv_layer2 = nn.Sequential(
# Conv Layer block 2
nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
nn.BatchNorm2d(128) if batch_norm else nn.Identity(),
nn.ReLU(inplace=True),
nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Dropout2d(p=0.15) if dropout else nn.Identity(),
)
self.conv_layer3 = nn.Sequential(
# Conv Layer block 3
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
nn.BatchNorm2d(256) if batch_norm else nn.Identity(),
nn.ReLU(inplace=True),
nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
)
self.fc_layer = nn.Sequential(
nn.Dropout(p=0.15) if dropout else nn.Identity(),
nn.Linear(4096, 1024),
nn.ReLU(inplace=True),
nn.Linear(1024, 512),
nn.ReLU(inplace=True),
nn.Dropout(p=0.1),
nn.Linear(512, 10),
)
def conv_out(self, x, c):
return self.conv_layer1(x)[:, c, :, :]
def forward(self, x):
"""Perform forward."""
# conv layers
x = self.conv_layer1(x)
x = self.conv_layer2(x)
x = self.conv_layer3(x)
# flatten
x = x.view(x.size(0), -1)
# fc layer
x = self.fc_layer(x)
return x
We instantiate the CNN model, the optimizer and we can count the number of parameters:
model = LargeCNN(batch_norm=False, dropout=False).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
model_parameters = filter(lambda p: p.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print("Number of parameters: ", params)
Number of parameters: 5851338
train_n_test(model, optimizer, 30, criterion, "cifar_large_cnn.pt")
Epoch: 1 Training Loss: 1.599818 Validation Loss: 1.204596 Validation loss decreased (inf --> 1.204596). Saving model ... Epoch: 2 Training Loss: 0.997227 Validation Loss: 0.885379 Validation loss decreased (1.204596 --> 0.885379). Saving model ... Epoch: 3 Training Loss: 0.749767 Validation Loss: 0.766493 Validation loss decreased (0.885379 --> 0.766493). Saving model ... Epoch: 4 Training Loss: 0.608492 Validation Loss: 0.732680 Validation loss decreased (0.766493 --> 0.732680). Saving model ... Epoch: 5 Training Loss: 0.495372 Validation Loss: 0.702470 Validation loss decreased (0.732680 --> 0.702470). Saving model ... Epoch: 6 Training Loss: 0.392042 Validation Loss: 0.738384 Epoch: 7 Training Loss: 0.308529 Validation Loss: 0.800675 Epoch: 8 Training Loss: 0.238643 Validation Loss: 0.824499 Epoch: 9 Training Loss: 0.197688 Validation Loss: 0.917873 Epoch: 10 Training Loss: 0.163779 Validation Loss: 0.975063 Epoch: 11 Training Loss: 0.152591 Validation Loss: 1.097368 Epoch: 12 Training Loss: 0.123191 Validation Loss: 1.182138 Epoch: 13 Training Loss: 0.113065 Validation Loss: 1.235141 Epoch: 14 Training Loss: 0.118290 Validation Loss: 1.209514 Epoch: 15 Training Loss: 0.101396 Validation Loss: 1.232565 Epoch: 16 Training Loss: 0.099923 Validation Loss: 1.191586 Epoch: 17 Training Loss: 0.095028 Validation Loss: 1.341417 Epoch: 18 Training Loss: 0.090412 Validation Loss: 1.343380 Epoch: 19 Training Loss: 0.086220 Validation Loss: 1.343565 Epoch: 20 Training Loss: 0.084747 Validation Loss: 1.468028 Epoch: 21 Training Loss: 0.100639 Validation Loss: 1.442234 Epoch: 22 Training Loss: 0.080292 Validation Loss: 1.556585 Epoch: 23 Training Loss: 0.090976 Validation Loss: 1.438859 Epoch: 24 Training Loss: 0.083141 Validation Loss: 1.533127 Epoch: 25 Training Loss: 0.083212 Validation Loss: 1.566200 Epoch: 26 Training Loss: 0.078648 Validation Loss: 1.532702 Epoch: 27 Training Loss: 0.084718 Validation Loss: 1.466748 Epoch: 28 Training Loss: 0.075996 Validation Loss: 1.584655 Epoch: 29 Training Loss: 0.074190 Validation Loss: 1.621887 Epoch: 30 Training Loss: 0.074171 Validation Loss: 1.659567
plot("cifar_large_cnn.pt")
If we use batch normalization we can stabilize the training:
model = LargeCNN(batch_norm=True, dropout=False).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
train_n_test(model, optimizer, 30, criterion, "cifar_large_cnn_bn.pt")
Epoch: 1 Training Loss: 1.377527 Validation Loss: 1.151627 Validation loss decreased (inf --> 1.151627). Saving model ... Epoch: 2 Training Loss: 0.894417 Validation Loss: 0.934475 Validation loss decreased (1.151627 --> 0.934475). Saving model ... Epoch: 3 Training Loss: 0.721287 Validation Loss: 0.741299 Validation loss decreased (0.934475 --> 0.741299). Saving model ... Epoch: 4 Training Loss: 0.601858 Validation Loss: 0.736946 Validation loss decreased (0.741299 --> 0.736946). Saving model ... Epoch: 5 Training Loss: 0.519130 Validation Loss: 0.658583 Validation loss decreased (0.736946 --> 0.658583). Saving model ... Epoch: 6 Training Loss: 0.438963 Validation Loss: 0.646755 Validation loss decreased (0.658583 --> 0.646755). Saving model ... Epoch: 7 Training Loss: 0.375962 Validation Loss: 0.639045 Validation loss decreased (0.646755 --> 0.639045). Saving model ... Epoch: 8 Training Loss: 0.320418 Validation Loss: 0.737986 Epoch: 9 Training Loss: 0.276126 Validation Loss: 0.721208 Epoch: 10 Training Loss: 0.230144 Validation Loss: 0.750623 Epoch: 11 Training Loss: 0.198359 Validation Loss: 0.772792 Epoch: 12 Training Loss: 0.177253 Validation Loss: 0.872785 Epoch: 13 Training Loss: 0.153021 Validation Loss: 0.852632 Epoch: 14 Training Loss: 0.136633 Validation Loss: 0.835774 Epoch: 15 Training Loss: 0.128613 Validation Loss: 0.943488 Epoch: 16 Training Loss: 0.113854 Validation Loss: 0.984285 Epoch: 17 Training Loss: 0.113466 Validation Loss: 0.868559 Epoch: 18 Training Loss: 0.091723 Validation Loss: 0.952815 Epoch: 19 Training Loss: 0.094924 Validation Loss: 1.086942 Epoch: 20 Training Loss: 0.087029 Validation Loss: 1.085407 Epoch: 21 Training Loss: 0.086064 Validation Loss: 1.054228 Epoch: 22 Training Loss: 0.071511 Validation Loss: 1.157580 Epoch: 23 Training Loss: 0.078778 Validation Loss: 1.205795 Epoch: 24 Training Loss: 0.074825 Validation Loss: 1.140467 Epoch: 25 Training Loss: 0.068187 Validation Loss: 1.145609 Epoch: 26 Training Loss: 0.069380 Validation Loss: 1.043554 Epoch: 27 Training Loss: 0.061416 Validation Loss: 1.212794 Epoch: 28 Training Loss: 0.066645 Validation Loss: 1.144456 Epoch: 29 Training Loss: 0.062893 Validation Loss: 1.150404 Epoch: 30 Training Loss: 0.059742 Validation Loss: 1.169297
plot("cifar_large_cnn_bn.pt")
Dropout can also help to avoid overfitting:
model = LargeCNN(batch_norm=True, dropout=True).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
train_n_test(model, optimizer, 30, criterion, "cifar_large_cnn_bn_dropout.pt")
Epoch: 1 Training Loss: 1.434861 Validation Loss: 1.226824 Validation loss decreased (inf --> 1.226824). Saving model ... Epoch: 2 Training Loss: 1.004135 Validation Loss: 0.917697 Validation loss decreased (1.226824 --> 0.917697). Saving model ... Epoch: 3 Training Loss: 0.816937 Validation Loss: 0.844841 Validation loss decreased (0.917697 --> 0.844841). Saving model ... Epoch: 4 Training Loss: 0.708206 Validation Loss: 0.739159 Validation loss decreased (0.844841 --> 0.739159). Saving model ... Epoch: 5 Training Loss: 0.627765 Validation Loss: 0.668541 Validation loss decreased (0.739159 --> 0.668541). Saving model ... Epoch: 6 Training Loss: 0.567008 Validation Loss: 0.608081 Validation loss decreased (0.668541 --> 0.608081). Saving model ... Epoch: 7 Training Loss: 0.509372 Validation Loss: 0.669559 Epoch: 8 Training Loss: 0.468125 Validation Loss: 0.591877 Validation loss decreased (0.608081 --> 0.591877). Saving model ... Epoch: 9 Training Loss: 0.419433 Validation Loss: 0.595471 Epoch: 10 Training Loss: 0.381236 Validation Loss: 0.614316 Epoch: 11 Training Loss: 0.357222 Validation Loss: 0.590553 Validation loss decreased (0.591877 --> 0.590553). Saving model ... Epoch: 12 Training Loss: 0.328914 Validation Loss: 0.616229 Epoch: 13 Training Loss: 0.305198 Validation Loss: 0.588595 Validation loss decreased (0.590553 --> 0.588595). Saving model ... Epoch: 14 Training Loss: 0.292277 Validation Loss: 0.583493 Validation loss decreased (0.588595 --> 0.583493). Saving model ... Epoch: 15 Training Loss: 0.259140 Validation Loss: 0.612815 Epoch: 16 Training Loss: 0.243930 Validation Loss: 0.642086 Epoch: 17 Training Loss: 0.228200 Validation Loss: 0.593261 Epoch: 18 Training Loss: 0.218168 Validation Loss: 0.592078 Epoch: 19 Training Loss: 0.203393 Validation Loss: 0.623051 Epoch: 20 Training Loss: 0.191121 Validation Loss: 0.625495 Epoch: 21 Training Loss: 0.185056 Validation Loss: 0.667155 Epoch: 22 Training Loss: 0.174955 Validation Loss: 0.679312 Epoch: 23 Training Loss: 0.169246 Validation Loss: 0.672484 Epoch: 24 Training Loss: 0.158391 Validation Loss: 0.649721 Epoch: 25 Training Loss: 0.161155 Validation Loss: 0.685120 Epoch: 26 Training Loss: 0.147813 Validation Loss: 0.688835 Epoch: 27 Training Loss: 0.148170 Validation Loss: 0.652558 Epoch: 28 Training Loss: 0.136856 Validation Loss: 0.724329 Epoch: 29 Training Loss: 0.138581 Validation Loss: 0.697110 Epoch: 30 Training Loss: 0.135540 Validation Loss: 0.716323
plot("cifar_large_cnn_bn_dropout.pt")
plot_accuracies(
[
"cifar_large_cnn.pt",
"cifar_large_cnn_bn.pt",
"cifar_large_cnn_bn_dropout.pt",
],
["Large CNN", "Large CNN BN", "Large CNN BN Dropout"],
)
plot_accuracies(
["adam_cifar_fc.pt", "cifar_cnn.pt", "cifar_large_cnn_bn_dropout.pt"],
["FC", "CNN", "Large CNN"],
)