The Fish and the (Alex)Net
Implementation of AlexNet from scratch using PyTorch framework on a custom Dataset
- From Kaggle to Colab
- Libraries
- Data Processing
- Helper functions
- AlexNet
- Setup
- 1. Code:
- 2. Architecture:
From Kaggle to Colab
- Use the steps as devised in the Blog previously.
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
import numpy as np
import pandas as pd
import random
import os
import matplotlib.pyplot as plt
import PIL
from PIL import Image
import time
import seaborn as sns
import glob
from pathlib import Path
torch.manual_seed(1)
np.random.seed(1)
data_path = Path.cwd()/'Fish_Dataset/Fish_Dataset'
# Path for all the files in a 'png' format.
image_path = list(data_path.glob('**/*.png'))
# Separate Segmented from Non-Segmented Images
non_segmented_images = [img for img in image_path if 'GT' not in str(img)]
labels_non_segment = [img.parts[-3] for img in non_segmented_images]
segmented_images = [img for img in image_path if 'GT' in str(img)]
lables_segment = [img.parts[-3] for img in segmented_images]
classes = list(set(lables_segment))
# Convert String Labels to int
int_classes = {fish:i for i,fish in enumerate(classes)}
lables = [int_classes[lable] for lable in labels_non_segment]
image_data = pd.DataFrame({'Path': non_segmented_images,\
'labels': lables})
train,test, train_labels, test_labels = train_test_split(image_data.Path, image_data.labels, test_size=0.2, shuffle=True)
train,val, train_labels, val_labels = train_test_split(train, train_labels, test_size=0.2, shuffle=True)
class FishDataset(Dataset):
"""
Loads Images using pillow and applies transformations.
"""
def __init__(self, images, labels, transform = None):
self.images = images
self.labels = labels
self.transform = transform
def __len__(self):
return len(self.labels)
def __getitem__(self, idx):
img = Image.open(self.images.iloc[idx])
if self.transform:
img = self.transform(img)
label = self.labels.iloc[idx]
return img, label
### FISH DATASET
##########################
# Transforming the Training Data
train_transform = transforms.Compose([transforms.Resize((224,224)),
transforms.ToTensor(),
transforms.RandomHorizontalFlip(),
transforms.RandomVerticalFlip(),
transforms.ColorJitter(brightness=0, contrast=0, saturation=0,hue=0.5),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
# Transforming Test Data
test_transforms = transforms.Compose([transforms.Resize((224,224)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
def get_loaders(train, train_labels, val, val_labels,test, test_labels, batch_size, num_workers, train_transform, test_transform):
"""
Returns Train,Validation and Test Loaders.
"""
train_ds = FishDataset(images = train, labels = train_labels, transform = train_transform)
val_ds = FishDataset(images = val, labels = val_labels, transform = test_transforms)
test_ds = FishDataset(images = test, labels = test_labels, transform = test_transforms)
train_loader = DataLoader(train_ds, batch_size=batch_size,num_workers=num_workers,
shuffle= True)
val_loader = DataLoader(val_ds, batch_size=batch_size,num_workers=num_workers,
shuffle= False)
test_loader = DataLoader(test_ds, batch_size=batch_size,num_workers=num_workers,
shuffle= False)
return train_loader, val_loader, test_loader
def set_all_seeds(seed):
os.environ["PL_GLOBAL_SEED"] = str(seed)
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
def compute_accuracy(model, data_loader, device):
"""Compute Accuracy for the provided Data Loader"""
model.eval
with torch.no_grad():
correct_pred, num_examples = 0, 0
for i, (features, targets) in enumerate(data_loader):
features = features.to(device)
targets = targets.float().to(device)
logits = model(features)
_, predicted_labels = torch.max(logits, 1)
num_examples += targets.size(0)
correct_pred += (predicted_labels == targets).sum()
return correct_pred.float()/num_examples * 100
class UnNormalize(object):
"""De-Normalize Test Images, if any Normalization was done"""
def __init__(self, mean, std):
self.mean = mean
self.std = std
def __call__(self, tensor):
for t, m, s in zip(tensor, self.mean, self.std):
t.mul_(s).add_(m)
return tensor
def plot_training_loss(minibatch_loss_list, num_epochs, iter_per_epoch,
results_dir=None, averaging_iterations=100):
"""Observe the Training Loss"""
plt.figure()
ax1 = plt.subplot(1, 1, 1)
ax1.plot(range(len(minibatch_loss_list)),(minibatch_loss_list), label='Minibatch Loss')
if len(minibatch_loss_list) > 1000:
ax1.set_ylim([0, np.max(minibatch_loss_list[1000:])*1.5])
ax1.set_xlabel('Iterations')
ax1.set_ylabel('Loss')
ax1.plot(np.convolve(minibatch_loss_list,
np.ones(averaging_iterations,)/averaging_iterations,
mode='valid'),
label='Running Average')
ax1.legend()
###################
# Set second x-axis
###################
ax2 = ax1.twiny()
newlabel = list(range(num_epochs+1))
newpos = [e*iter_per_epoch for e in newlabel]
ax2.set_xticks(newpos[::10])
ax2.set_xticklabels(newlabel[::10])
ax2.xaxis.set_ticks_position('bottom')
ax2.xaxis.set_label_position('bottom')
ax2.spines['bottom'].set_position(('outward', 45))
ax2.set_xlabel('Epochs')
ax2.set_xlim(ax1.get_xlim())
###################
plt.tight_layout()
if results_dir is not None:
image_path = os.path.join(results_dir, 'plot_training_loss.pdf')
plt.savefig(image_path)
def plot_accuracy(train_acc_list, valid_acc_list, results_dir):
num_epochs = len(train_acc_list)
plt.plot(np.arange(1, num_epochs+1),
train_acc_list, label='Training')
plt.plot(np.arange(1, num_epochs+1),
valid_acc_list, label='Validation')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.tight_layout()
if results_dir is not None:
image_path = os.path.join(
results_dir, 'plot_acc_training_validation.pdf')
plt.savefig(image_path)
def show_examples(model, data_loader, unnormalizer=None, class_dict=None):
"""Visualize the predictions"""
for batch_idx, (features, targets) in enumerate(data_loader):
with torch.no_grad():
features = features
targets = targets
logits = model(features)
predictions = torch.argmax(logits, dim=1)
break
fig, axes = plt.subplots(nrows=3, ncols=5,
sharex=True, sharey=True)
if unnormalizer is not None:
for idx in range(features.shape[0]):
features[idx] = unnormalizer(features[idx])
nhwc_img = np.transpose(features, axes=(0, 2, 3, 1))
if nhwc_img.shape[-1] == 1:
nhw_img = np.squeeze(nhwc_img.numpy(), axis=3)
for idx, ax in enumerate(axes.ravel()):
ax.imshow(nhw_img[idx], cmap='binary')
if class_dict is not None:
ax.title.set_text(f'P: {class_dict[predictions[idx].item()]}'
f'\nT: {class_dict[targets[idx].item()]}')
else:
ax.title.set_text(f'P: {predictions[idx]} | T: {targets[idx]}')
ax.axison = False
else:
for idx, ax in enumerate(axes.ravel()):
ax.imshow(nhwc_img[idx])
if class_dict is not None:
ax.title.set_text(f'P: {class_dict[predictions[idx].item()]}'
f'\nT: {class_dict[targets[idx].item()]}')
else:
ax.title.set_text(f'P: {predictions[idx]} | T: {targets[idx]}')
ax.axison = False
plt.tight_layout()
plt.show()
### SETTINGS
##########################
RANDOM_SEED = 123
BATCH_SIZE = 64
NUM_EPOCHS = 10
WORKERS = 2
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
set_all_seeds(RANDOM_SEED)
train_loader, val_loader, test_loader = get_loaders(train,train_labels,val, val_labels, test,test_labels, BATCH_SIZE,WORKERS,
train_transform, test_transforms)
AlexNet
One of the most grounbreaking events in the DL community is the advent of AlexNet that was head and shoulders ahead of every other conceivable solution for ImageNet 2012 Challenge
AlexNet is a variant of CNN which because of it's success in the ImageNet 2012 Challenge showcased the ability of neural nets in beating manually crafted models.
Architecture
Comparison of the architectures of LeNet and AlexNet by CMG Lee using data from http://d2l.ai/chapter_convolutional-neural-networks/lenet.html and http://d2l.ai/chapter_convolutional-modern/alexnet.html .
-
One can observe that while LeNet-5 is deep with 5 layers(2 Convolutions and 3 fully connected layers), AlexNet is much deeper with 8 layers(5 Convolution Layers and 3 fully connected ones). From an architectural point of view, both the networks are similar.
-
AlexNet resorts to ReLU as an activation function whereas LeNet used sigmoid.
-
In LeNet we were striding using the same size as that of Kernel.
-
The AlexNet architecture also employed dropout as a regularization technique whereas LeNet relied on weight decay.
class AlexNet(nn.Module):
"""
Implementation of AlexNet, from paper
"ImageNet Classification with Deep Convolutional Neural Networks" by Alex Krizhevsky et al.
See: https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf
"""
def __init__(self, num_classes):
super().__init__()
self.num_classes = num_classes
self.layers = nn.Sequential(
# Here, we use a larger 11 x 11 window to capture objects. At the same
# time, we use a stride of 4 to greatly reduce the height and width of the
# output. Here, the number of output channels is much larger than that in
# LeNet
nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=1), nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2),
# Make the convolution window smaller, set padding to 2 for consistent
# height and width across the input and output, and increase the number of
# output channels
nn.Conv2d(96, 256, kernel_size=5, padding=2), nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2),
# Use three successive convolutional layers and a smaller convolution
# window. Except for the final convolutional layer, the number of output
# channels is further increased. Pooling layers are not used to reduce the
# height and width of input after the first two convolutional layers
nn.Conv2d(256, 384, kernel_size=3, padding=1), nn.ReLU(),
nn.Conv2d(384, 384, kernel_size=3, padding=1), nn.ReLU(),
nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2), nn.Flatten(),
# Here, the number of outputs of the fully-connected layer is several
# times larger than that in LeNet. Use the dropout layer to mitigate
# overfitting
nn.Linear(6400, 4096), nn.ReLU(), nn.Dropout(p=0.5),
nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(p=0.5),
# Output layer. Since we are using Fashion-MNIST, the number of classes is
# 10, instead of 1000 as in the paper
nn.Linear(4096, num_classes)
)
def forward(self,x):
logits = self.layers(x)
return logits
X = torch.randn(1,3,224,224)
net = AlexNet(num_classes=9)
for layer in net.layers:
X = layer(X)
print(layer.__class__.__name__,f"\t output shape: {X.shape}")
model = AlexNet(num_classes=9)
model = model.to(DEVICE)
# Optimizer
optimizer = torch.optim.SGD(model.parameters(), momentum=0.9, lr=0.001)
# Scheduler
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
factor=0.1,
mode='max',
verbose=True)
logging_interval = 50
scheduler_on='minibatch_loss'
start_time = time.time()
minibatch_loss_list, train_acc_list, valid_acc_list = [],[],[]
for epoch in range(NUM_EPOCHS):
# Start Training
model.train()
for batch_idx, (features, target) in enumerate(train_loader):
features = features.to(DEVICE)
targets = target.to(DEVICE)
# Forward and BackPropagation
logits = model(features)
loss = F.cross_entropy(logits, targets)
optimizer.zero_grad()
loss.backward()
# Update Model Parameters
optimizer.step()
## LOGGING
minibatch_loss_list.append(loss.item())
if not batch_idx % logging_interval:
print(f"Epoch = {epoch+1:03d}/{NUM_EPOCHS:03d}"
f"| Batch {batch_idx:04d}/{len(train_loader):04d}"
f"| Loss: {loss:.4f}")
## Validation
model.eval()
with torch.no_grad():
train_acc = compute_accuracy(model, train_loader, DEVICE)
valid_acc = compute_accuracy(model, val_loader, DEVICE)
print(f'Epoch: {epoch+1}/{NUM_EPOCHS:03d} '
f'| Train: {train_acc :.2f}% '
f'| Validation: {valid_acc :.2f}%')
train_acc_list.append(train_acc)
valid_acc_list.append(valid_acc)
elapsed = (time.time() - start_time)/60
print(f'Time elapsed: {elapsed:.2f} min')
if scheduler is not None:
if scheduler_on == "valid_acc":
scheduler.step(valid_acc_list[-1])
if scheduler_on == 'minibatch_loss':
scheduler.step(minibatch_loss_list[-1])
else:
raise ValueError("Invalid `scheduler_on` choice")
total_elapsed = (time.time() - start_time)/60
print(f'Total Training Time: {total_elapsed:.2f} min')
# Compute Test Accuracy
test_acc = compute_accuracy(model, test_loader, device=DEVICE)
print(f"Test accuracy: {test_acc:0.3f}")
- We covered steps for constructing a complete DL Pipeline i.e., from fetching the data to using the Model for prediction on unseen Data.
- In short, Test accuracy is 11 %
- Training TIme is approx 32 Mins
-
Well, not so great results on our custom Dataset. Nonetheless, AlexNet was specifically designet for ImageNet challenge.
-
One of the very first examples of Deep Neural Nets after which we saw how important architecture was in increasing the accuracy of the prediction.
References:
1. Code:
- Sebastian Raschka's code in a plug-and-play format was superhelpful.
2. Architecture:
ImageNet Classification with Deep ConvolutionalNeural Networks by Alex et al.