From Kaggle to Colab

Use the steps as devised in the Blog previously.

Libraries

import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
import numpy as np
import pandas as pd
import random
import os
import matplotlib.pyplot as plt
import PIL
from PIL import Image

import time
import seaborn as sns
import glob
from pathlib import Path
torch.manual_seed(1)
np.random.seed(1)

Data Processing

data_path = Path.cwd()/'Fish_Dataset/Fish_Dataset'

# Path for all the files in a 'png' format.
image_path = list(data_path.glob('**/*.png')) 

# Separate Segmented from Non-Segmented Images

non_segmented_images = [img for img in image_path if 'GT' not in str(img)]
labels_non_segment = [img.parts[-3] for img in non_segmented_images]

segmented_images = [img for img in image_path if 'GT' in str(img)]
lables_segment = [img.parts[-3] for img in segmented_images]

classes = list(set(lables_segment))

# Convert String Labels to int

int_classes = {fish:i for i,fish in enumerate(classes)}

lables = [int_classes[lable] for lable in labels_non_segment]

image_data = pd.DataFrame({'Path': non_segmented_images,\
              'labels': lables})

train,test, train_labels, test_labels = train_test_split(image_data.Path, image_data.labels, test_size=0.2, shuffle=True)

train,val, train_labels, val_labels = train_test_split(train, train_labels, test_size=0.2, shuffle=True)

class FishDataset(Dataset):
  """
  Loads Images using pillow and applies transformations.
  """
  def __init__(self, images, labels, transform = None):
    self.images = images
    self.labels = labels
    self.transform = transform

  def __len__(self):
    return len(self.labels)

  def __getitem__(self, idx):
    img = Image.open(self.images.iloc[idx])

    if self.transform:
      img = self.transform(img)
    label = self.labels.iloc[idx]
    return img, label

Helper functions

### FISH DATASET
##########################

# Transforming the Training Data
train_transform = transforms.Compose([transforms.Resize((224,224)),
                                      transforms.ToTensor(),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.RandomVerticalFlip(),
                                      transforms.ColorJitter(brightness=0, contrast=0, saturation=0,hue=0.5),
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                                      ])

# Transforming Test Data
test_transforms = transforms.Compose([transforms.Resize((224,224)),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

def get_loaders(train, train_labels, val, val_labels,test, test_labels, batch_size, num_workers, train_transform, test_transform):
  """
  Returns Train,Validation and Test Loaders.
  """

  train_ds = FishDataset(images = train, labels = train_labels, transform = train_transform)
  val_ds = FishDataset(images = val, labels = val_labels, transform = test_transforms)
  test_ds = FishDataset(images = test, labels = test_labels, transform = test_transforms)

  train_loader = DataLoader(train_ds, batch_size=batch_size,num_workers=num_workers,
                            shuffle= True)
  val_loader = DataLoader(val_ds, batch_size=batch_size,num_workers=num_workers,
                            shuffle= False)
  test_loader = DataLoader(test_ds, batch_size=batch_size,num_workers=num_workers,
                          shuffle= False)
  return train_loader, val_loader, test_loader


def set_all_seeds(seed):
  os.environ["PL_GLOBAL_SEED"] = str(seed)
  random.seed(seed)
  np.random.seed(seed)
  torch.manual_seed(seed)
  torch.cuda.manual_seed_all(seed)

def compute_accuracy(model, data_loader, device):
  """Compute Accuracy for the provided Data Loader"""
  model.eval
  with torch.no_grad():
    correct_pred, num_examples = 0, 0

    for i, (features, targets) in enumerate(data_loader):
      features = features.to(device)
      targets = targets.float().to(device)

      logits = model(features)
      _, predicted_labels = torch.max(logits, 1)

      num_examples += targets.size(0)
      correct_pred += (predicted_labels == targets).sum()
  return correct_pred.float()/num_examples * 100


class UnNormalize(object):
  """De-Normalize Test Images, if any Normalization was done"""
  def __init__(self, mean, std):
    self.mean = mean
    self.std = std

  def __call__(self, tensor):
    for t, m, s in zip(tensor, self.mean, self.std):
      t.mul_(s).add_(m)
    return tensor

def plot_training_loss(minibatch_loss_list, num_epochs, iter_per_epoch,
                       results_dir=None, averaging_iterations=100):
  """Observe the Training Loss"""

  plt.figure()
  ax1 = plt.subplot(1, 1, 1)
  ax1.plot(range(len(minibatch_loss_list)),(minibatch_loss_list), label='Minibatch Loss')

  if len(minibatch_loss_list) > 1000:
    ax1.set_ylim([0, np.max(minibatch_loss_list[1000:])*1.5])
    ax1.set_xlabel('Iterations')
    ax1.set_ylabel('Loss')

    ax1.plot(np.convolve(minibatch_loss_list,
                         np.ones(averaging_iterations,)/averaging_iterations,
                         mode='valid'),
             label='Running Average')
    ax1.legend()

    ###################
    # Set second x-axis
    ###################
    ax2 = ax1.twiny()
    newlabel = list(range(num_epochs+1))

    newpos = [e*iter_per_epoch for e in newlabel]

    ax2.set_xticks(newpos[::10])
    ax2.set_xticklabels(newlabel[::10])

    ax2.xaxis.set_ticks_position('bottom')
    ax2.xaxis.set_label_position('bottom')
    ax2.spines['bottom'].set_position(('outward', 45))
    ax2.set_xlabel('Epochs')
    ax2.set_xlim(ax1.get_xlim())
    ###################

    plt.tight_layout()

    if results_dir is not None:
        image_path = os.path.join(results_dir, 'plot_training_loss.pdf')
        plt.savefig(image_path)


def plot_accuracy(train_acc_list, valid_acc_list, results_dir):

    num_epochs = len(train_acc_list)

    plt.plot(np.arange(1, num_epochs+1),
             train_acc_list, label='Training')
    plt.plot(np.arange(1, num_epochs+1),
             valid_acc_list, label='Validation')

    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.tight_layout()

    if results_dir is not None:
        image_path = os.path.join(
            results_dir, 'plot_acc_training_validation.pdf')
        plt.savefig(image_path)


def show_examples(model, data_loader, unnormalizer=None, class_dict=None):
  """Visualize the predictions"""
  for batch_idx, (features, targets) in enumerate(data_loader):
    with torch.no_grad():
      features = features
      targets = targets
      logits = model(features)
      predictions = torch.argmax(logits, dim=1)
      break

  fig, axes = plt.subplots(nrows=3, ncols=5,
                           sharex=True, sharey=True)
    
  if unnormalizer is not None:
    for idx in range(features.shape[0]):
      features[idx] = unnormalizer(features[idx])
  nhwc_img = np.transpose(features, axes=(0, 2, 3, 1))
    
  if nhwc_img.shape[-1] == 1:
    nhw_img = np.squeeze(nhwc_img.numpy(), axis=3)

    for idx, ax in enumerate(axes.ravel()):
      ax.imshow(nhw_img[idx], cmap='binary')
      if class_dict is not None:
        ax.title.set_text(f'P: {class_dict[predictions[idx].item()]}'
        f'\nT: {class_dict[targets[idx].item()]}')
      else:
        ax.title.set_text(f'P: {predictions[idx]} | T: {targets[idx]}')
        ax.axison = False

  else:
    for idx, ax in enumerate(axes.ravel()):
      ax.imshow(nhwc_img[idx])
      if class_dict is not None:
        ax.title.set_text(f'P: {class_dict[predictions[idx].item()]}'
        f'\nT: {class_dict[targets[idx].item()]}')
      else:
        ax.title.set_text(f'P: {predictions[idx]} | T: {targets[idx]}')
        ax.axison = False
  plt.tight_layout()
  plt.show()

### SETTINGS
##########################

RANDOM_SEED = 123
BATCH_SIZE = 64
NUM_EPOCHS = 10
WORKERS = 2
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

set_all_seeds(RANDOM_SEED)

train_loader, val_loader, test_loader = get_loaders(train,train_labels,val, val_labels, test,test_labels, BATCH_SIZE,WORKERS,
                                                    train_transform, test_transforms)

AlexNet

One of the most grounbreaking events in the DL community is the advent of AlexNet that was head and shoulders ahead of every other conceivable solution for ImageNet 2012 Challenge

AlexNet is a variant of CNN which because of it's success in the ImageNet 2012 Challenge showcased the ability of neural nets in beating manually crafted models.

Architecture

Comparison of the architectures of LeNet and AlexNet by CMG Lee using data from http://d2l.ai/chapter_convolutional-neural-networks/lenet.html and http://d2l.ai/chapter_convolutional-modern/alexnet.html .

One can observe that while LeNet-5 is deep with 5 layers(2 Convolutions and 3 fully connected layers), AlexNet is much deeper with 8 layers(5 Convolution Layers and 3 fully connected ones). From an architectural point of view, both the networks are similar.
AlexNet resorts to ReLU as an activation function whereas LeNet used sigmoid.
In LeNet we were striding using the same size as that of Kernel.
The AlexNet architecture also employed dropout as a regularization technique whereas LeNet relied on weight decay.

class AlexNet(nn.Module):
  """
  Implementation of AlexNet, from paper
  "ImageNet Classification with Deep Convolutional Neural Networks" by Alex Krizhevsky et al.
  See: https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf
  """
  def __init__(self, num_classes):
    super().__init__()
    self.num_classes = num_classes

    self.layers = nn.Sequential(
        # Here, we use a larger 11 x 11 window to capture objects. At the same
        # time, we use a stride of 4 to greatly reduce the height and width of the
        # output. Here, the number of output channels is much larger than that in
        # LeNet
        nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=1), nn.ReLU(),
        nn.MaxPool2d(kernel_size=3, stride=2),
        # Make the convolution window smaller, set padding to 2 for consistent
        # height and width across the input and output, and increase the number of
        # output channels
        nn.Conv2d(96, 256, kernel_size=5, padding=2), nn.ReLU(),
        nn.MaxPool2d(kernel_size=3, stride=2),
        # Use three successive convolutional layers and a smaller convolution
        # window. Except for the final convolutional layer, the number of output
        # channels is further increased. Pooling layers are not used to reduce the
        # height and width of input after the first two convolutional layers
        nn.Conv2d(256, 384, kernel_size=3, padding=1), nn.ReLU(),
        nn.Conv2d(384, 384, kernel_size=3, padding=1), nn.ReLU(),
        nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(),
        nn.MaxPool2d(kernel_size=3, stride=2), nn.Flatten(),
        # Here, the number of outputs of the fully-connected layer is several
        # times larger than that in LeNet. Use the dropout layer to mitigate
        # overfitting
        nn.Linear(6400, 4096), nn.ReLU(), nn.Dropout(p=0.5),
        nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(p=0.5),
        # Output layer. Since we are using Fashion-MNIST, the number of classes is
        # 10, instead of 1000 as in the paper
        nn.Linear(4096, num_classes)
    )
  def forward(self,x):
    logits = self.layers(x)
    return logits

Observing the Network

A tri-channel example datapoint is created to observe the shape of the output layer.

X = torch.randn(1,3,224,224)

net = AlexNet(num_classes=9)
for layer in net.layers:
  X = layer(X)
  print(layer.__class__.__name__,f"\t output shape: {X.shape}")

Conv2d 	 output shape: torch.Size([1, 96, 54, 54])
ReLU 	 output shape: torch.Size([1, 96, 54, 54])
MaxPool2d 	 output shape: torch.Size([1, 96, 26, 26])
Conv2d 	 output shape: torch.Size([1, 256, 26, 26])
ReLU 	 output shape: torch.Size([1, 256, 26, 26])
MaxPool2d 	 output shape: torch.Size([1, 256, 12, 12])
Conv2d 	 output shape: torch.Size([1, 384, 12, 12])
ReLU 	 output shape: torch.Size([1, 384, 12, 12])
Conv2d 	 output shape: torch.Size([1, 384, 12, 12])
ReLU 	 output shape: torch.Size([1, 384, 12, 12])
Conv2d 	 output shape: torch.Size([1, 256, 12, 12])
ReLU 	 output shape: torch.Size([1, 256, 12, 12])
MaxPool2d 	 output shape: torch.Size([1, 256, 5, 5])
Flatten 	 output shape: torch.Size([1, 6400])
Linear 	 output shape: torch.Size([1, 4096])
ReLU 	 output shape: torch.Size([1, 4096])
Dropout 	 output shape: torch.Size([1, 4096])
Linear 	 output shape: torch.Size([1, 4096])
ReLU 	 output shape: torch.Size([1, 4096])
Dropout 	 output shape: torch.Size([1, 4096])
Linear 	 output shape: torch.Size([1, 9])

Setup

model = AlexNet(num_classes=9)

model = model.to(DEVICE)

# Optimizer
optimizer = torch.optim.SGD(model.parameters(), momentum=0.9, lr=0.001)
# Scheduler
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                       factor=0.1,
                                                       mode='max',
                                                       verbose=True)

Training

logging_interval = 50
scheduler_on='minibatch_loss'
start_time = time.time()

minibatch_loss_list, train_acc_list, valid_acc_list = [],[],[]

for epoch in range(NUM_EPOCHS):
  # Start Training
  model.train()
  for batch_idx, (features, target) in enumerate(train_loader):
    features = features.to(DEVICE)
    targets = target.to(DEVICE)
    # Forward and BackPropagation
    logits = model(features)
    loss = F.cross_entropy(logits, targets)
    optimizer.zero_grad()
    loss.backward()

    # Update Model Parameters
    optimizer.step()

    ## LOGGING
    minibatch_loss_list.append(loss.item())
    if not batch_idx % logging_interval:
      print(f"Epoch = {epoch+1:03d}/{NUM_EPOCHS:03d}"
      f"| Batch {batch_idx:04d}/{len(train_loader):04d}"
      f"| Loss: {loss:.4f}")
    
  ## Validation
  model.eval()
  with torch.no_grad():
    train_acc = compute_accuracy(model, train_loader, DEVICE)
    valid_acc = compute_accuracy(model, val_loader, DEVICE)
    print(f'Epoch: {epoch+1}/{NUM_EPOCHS:03d} '
    f'| Train: {train_acc :.2f}% '
    f'| Validation: {valid_acc :.2f}%')
    train_acc_list.append(train_acc)
    valid_acc_list.append(valid_acc)
    
  elapsed = (time.time() - start_time)/60
  print(f'Time elapsed: {elapsed:.2f} min')

  if scheduler is not None:
    if scheduler_on == "valid_acc":
      scheduler.step(valid_acc_list[-1])
    if scheduler_on == 'minibatch_loss':
      scheduler.step(minibatch_loss_list[-1])
    else:
      raise ValueError("Invalid `scheduler_on` choice")

total_elapsed = (time.time() - start_time)/60
print(f'Total Training Time: {total_elapsed:.2f} min')

# Compute Test Accuracy

test_acc = compute_accuracy(model, test_loader, device=DEVICE)

print(f"Test accuracy: {test_acc:0.3f}")

Epoch = 001/010| Batch 0000/0090| Loss: 2.1966
Epoch = 001/010| Batch 0050/0090| Loss: 2.1958
Epoch: 1/010 | Train: 11.25% | Validation: 10.07%
Time elapsed: 3.17 min
Epoch = 002/010| Batch 0000/0090| Loss: 2.1973
Epoch = 002/010| Batch 0050/0090| Loss: 2.1971
Epoch: 2/010 | Train: 11.25% | Validation: 10.07%
Time elapsed: 6.35 min
Epoch = 003/010| Batch 0000/0090| Loss: 2.1957
Epoch = 003/010| Batch 0050/0090| Loss: 2.1996
Epoch: 3/010 | Train: 13.82% | Validation: 13.40%
Time elapsed: 9.57 min
Epoch = 004/010| Batch 0000/0090| Loss: 2.1974
Epoch = 004/010| Batch 0050/0090| Loss: 2.1993
Epoch: 4/010 | Train: 20.57% | Validation: 17.99%
Time elapsed: 12.76 min
Epoch = 005/010| Batch 0000/0090| Loss: 2.1964
Epoch = 005/010| Batch 0050/0090| Loss: 2.1964
Epoch: 5/010 | Train: 15.82% | Validation: 14.65%
Time elapsed: 15.92 min
Epoch = 006/010| Batch 0000/0090| Loss: 2.1969
Epoch = 006/010| Batch 0050/0090| Loss: 2.1958
Epoch: 6/010 | Train: 11.56% | Validation: 9.44%
Time elapsed: 19.13 min
Epoch = 007/010| Batch 0000/0090| Loss: 2.1962
Epoch = 007/010| Batch 0050/0090| Loss: 2.2024
Epoch: 7/010 | Train: 11.56% | Validation: 9.44%
Time elapsed: 22.32 min
Epoch = 008/010| Batch 0000/0090| Loss: 2.1971
Epoch = 008/010| Batch 0050/0090| Loss: 2.1966
Epoch: 8/010 | Train: 11.56% | Validation: 9.44%
Time elapsed: 25.55 min
Epoch = 009/010| Batch 0000/0090| Loss: 2.1971
Epoch = 009/010| Batch 0050/0090| Loss: 2.1996
Epoch: 9/010 | Train: 11.56% | Validation: 9.44%
Time elapsed: 28.82 min
Epoch = 010/010| Batch 0000/0090| Loss: 2.1947
Epoch = 010/010| Batch 0050/0090| Loss: 2.1927
Epoch: 10/010 | Train: 11.56% | Validation: 9.44%
Time elapsed: 32.02 min
Total Training Time: 1921.41 min
Test accuracy: 11.000

We covered steps for constructing a complete DL Pipeline i.e., from fetching the data to using the Model for prediction on unseen Data.
In short, Test accuracy is 11 %
Training TIme is approx 32 Mins
Well, not so great results on our custom Dataset. Nonetheless, AlexNet was specifically designet for ImageNet challenge.
One of the very first examples of Deep Neural Nets after which we saw how important architecture was in increasing the accuracy of the prediction.

The Fish and the (Alex)Net