Imports¶

import os
import time
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms, models
from collections import OrderedDict
import PIL

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

dataset_location = '/home/marcin/Datasets/udacity-challange-flower-data/flower_data/'

Convert Images to Dataset (optional)¶

This section does following:

reads all images
optinally auguments train set
passess all images through conv layers of pretrained net
saves extracted features to file
resulting file contains:
- optionally augumented and extended train set - features & labels
- untoutched valid set - features & labels

You need to re-run this section only if:

you want to change agumentations
you want to change pretrained network

imgnet_mean, imgnet_std = np.array([0.485, 0.456, 0.406]), np.array([0.229, 0.224, 0.225])

# Option 'v2'
transforms_train = transforms.Compose([
    transforms.Resize(256),
    transforms.Pad(100, padding_mode='reflect'),
    transforms.RandomAffine(degrees=90, translate=(.2, .2), shear=30, resample=PIL.Image.BILINEAR),
    transforms.CenterCrop(256),
    transforms.RandomResizedCrop(224, scale=(0.1 , 2.)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.ColorJitter(brightness=.2, contrast=.2, saturation=.2, hue=.1),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], 
                         [0.229, 0.224, 0.225])
])


# # Option 'v1'
# transforms_train = transforms.Compose([
#     transforms.Resize(256),
#     transforms.Pad(100, padding_mode='reflect'),
#     transforms.RandomRotation(45),
#     transforms.CenterCrop(256),
#     transforms.RandomResizedCrop(224, scale=(0.8 , 1.0)),
#     transforms.RandomHorizontalFlip(),
#     transforms.ToTensor(),
#     transforms.Normalize([0.485, 0.456, 0.406], 
#                          [0.229, 0.224, 0.225])
# ])

# Option 'Tomasz'
# transforms_train = transforms.Compose([
#     transforms.RandomRotation(30),
#     transforms.RandomResizedCrop(224, scale=(0.08 , 1.0)),
#     transforms.RandomHorizontalFlip(),
#     transforms.ToTensor(),
#     transforms.Normalize([0.485, 0.456, 0.406], 
#                          [0.229, 0.224, 0.225])
# ])

transforms_valid = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], 
                         [0.229, 0.224, 0.225])
])

Test transforms

def tensor_img_2_numpy(tensor_img):
    ttt = transforms.functional.normalize(tensor_img, -imgnet_mean/imgnet_std, 1/imgnet_std)
    return transforms.functional.to_pil_image(ttt)

img = PIL.Image.open(os.path.join(dataset_location, 'train/1/image_06734.jpg'))

fig, axes = plt.subplots(ncols=6, figsize=[16,4])

axes[0].set_title('Original')
axes[0].imshow(img)
axes[0].axis('off')

axes[1].set_title('Valid/Test')
tensor_img = transforms_valid(img)
axes[1].imshow(tensor_img_2_numpy(tensor_img))
axes[1].axis('off')

for i in range(2, len(axes)):
    axes[i].set_title(f'Train #{i-2}')
    tensor_img = transforms_train(img)
    axes[i].imshow(tensor_img_2_numpy(tensor_img))
    axes[i].axis('off')

Create Dataloaders

dataset_train = datasets.ImageFolder(os.path.join(dataset_location, 'train'), transforms_train)
dataset_valid = datasets.ImageFolder(os.path.join(dataset_location, 'valid'), transforms_valid)
dataset_test = datasets.ImageFolder(os.path.join(dataset_location, 'test'), transforms_valid)
print('Number of train images:', len(dataset_train))
print('Number of valid images:', len(dataset_valid))
print('Number of test images:', len(dataset_test))

Number of train images: 6552
Number of valid images: 818
Number of test images: 819

dataloader_train = torch.utils.data.DataLoader(dataset_train, batch_size=16, shuffle=True,
                                               num_workers=6, pin_memory=True)
dataloader_valid = torch.utils.data.DataLoader(dataset_valid, batch_size=16, shuffle=True,
                                               num_workers=6, pin_memory=True)
dataloader_test = torch.utils.data.DataLoader(dataset_test, batch_size=16, shuffle=True,
                                               num_workers=6, pin_memory=True)

Sanity check

imgs, lbls = iter(dataloader_train).next()
print(imgs.shape)
print(lbls.shape)

fig, axes = plt.subplots(ncols=8, figsize=[16,4])
for i in range(len(axes)):
    tensor_img = imgs[i]
    axes[i].imshow(tensor_img_2_numpy(tensor_img)) 
    axes[i].axis('off')

torch.Size([16, 3, 224, 224])
torch.Size([16])

Precompute Features¶

class Passthrough(torch.nn.Module):
    def forward(self, input):
        return input

#model_cnn = models.densenet121(pretrained=True)
#model_cnn = models.resnet50(pretrained=True)
#model_cnn = models.resnet152(pretrained=True)
#model_cnn = models.inception_v3(pretrained=True)
model_cnn = models.densenet201(pretrained=True)

# disable all gradients
for param in model_cnn.parameters():
    param.requires_grad = False

model_cnn.classifier = Passthrough()
model_cnn.to(device);

/home/marcin/.anaconda/envs/ptgpu/lib/python3.7/site-packages/torchvision/models/densenet.py:212: UserWarning: nn.init.kaiming_normal is now deprecated in favor of nn.init.kaiming_normal_.
  nn.init.kaiming_normal(m.weight.data)

def extract_features(model_cnn, dataloader, num_epochs=1):
    """
    Params:
        model_cnn - big convnet with final dense layers removed
        dataloader - we get data from here
    """
    features_list = []
    labels_list = []
    
    model_cnn.eval()
    for epoch in range(num_epochs):
        print(f'Processing epoch: {epoch:3d}', end='')
        time_start = time.time()
        for images, labels in dataloader:
            images = images.to(device)
            outputs = model_cnn(images)
            features_list.append(outputs.cpu().numpy())
            labels_list.append(labels.numpy())
        time_interval = time.time() - time_start
        print(f'    fime: {time_interval:5.2f}')

    features_arr = np.concatenate(features_list)
    labels_arr = np.concatenate(labels_list)

    return features_arr, labels_arr

train_features, train_labels = extract_features(model_cnn, dataloader_train, num_epochs=30)

Processing epoch:   0    fime: 32.52
Processing epoch:   1    fime: 32.63
Processing epoch:   2    fime: 32.70
Processing epoch:   3    fime: 33.11
Processing epoch:   4    fime: 32.78
Processing epoch:   5    fime: 32.80
Processing epoch:   6    fime: 32.79
Processing epoch:   7    fime: 32.59
Processing epoch:   8    fime: 32.60
Processing epoch:   9    fime: 32.64
Processing epoch:  10    fime: 32.63
Processing epoch:  11    fime: 32.61
Processing epoch:  12    fime: 32.65
Processing epoch:  13    fime: 32.56
Processing epoch:  14    fime: 32.68
Processing epoch:  15    fime: 32.64
Processing epoch:  16    fime: 32.70
Processing epoch:  17    fime: 32.62
Processing epoch:  18    fime: 32.70
Processing epoch:  19    fime: 32.70
Processing epoch:  20    fime: 32.62
Processing epoch:  21    fime: 32.68
Processing epoch:  22    fime: 32.73
Processing epoch:  23    fime: 32.62
Processing epoch:  24    fime: 32.62
Processing epoch:  25    fime: 32.80
Processing epoch:  26    fime: 32.73
Processing epoch:  27    fime: 32.68
Processing epoch:  28    fime: 32.70
Processing epoch:  29    fime: 32.65

print('Shape of train features (inputs):', train_features.shape)
print('Shape of train labels (targets): ', train_labels.shape)
print('Type of train feat. and labels:  ', train_features.dtype, train_labels.dtype)
print('Sample of labels:', train_labels[:20])

Shape of train features (inputs): (196560, 1920)
Shape of train labels (targets):  (196560,)
Type of train feat. and labels:   float32 int64
Sample of labels: [18 59 44 76 70 52 81 39 91 44 56 26 48 91 80 78 24 78 13 26]

valid_features, valid_labels = extract_features(model_cnn, dataloader_valid)

Processing epoch:   0    fime:  4.72

print('Shape of validation features (inputs):', valid_features.shape)
print('Shape of validation labels (targets): ', valid_labels.shape)
print('Type of valid. features and labels:   ', valid_features.dtype, valid_labels.dtype)
print('Sample of labels:', valid_labels[:20])

Shape of validation features (inputs): (818, 1920)
Shape of validation labels (targets):  (818,)
Type of valid. features and labels:    float32 int64
Sample of labels: [ 17  52  56  83  68  54  56 100  52  95  38  51   6  61  82  74  87  96
  47  73]

test_features, test_labels = extract_features(model_cnn, dataloader_test)

Processing epoch:   0    fime:  4.90

print('Shape of test features (inputs):', test_features.shape)
print('Shape of test labels (targets): ', test_labels.shape)
print('Type of test. features and labels:   ', test_features.dtype, test_labels.dtype)
print('Sample of labels:', test_labels[:20])

Shape of test features (inputs): (819, 1920)
Shape of test labels (targets):  (819,)
Type of test. features and labels:    float32 int64
Sample of labels: [73 30 47 85 11 77 22  9 50 59 24 29 80 71 73 83 13 24 92 42]

Save Checkpoint

dataset_npz = os.path.join(dataset_location, 'dataset_densenet201_aug_v2_30x.npz')
dataset_npz

'/home/marcin/Datasets/udacity-challange-flower-data/flower_data/dataset_densenet201_aug_v2_30x.npz'

#uncomment if you realy want to save, will override existing
# np.savez(dataset_npz,
#          train_features=train_features,
#          train_labels=train_labels,
#          valid_features=valid_features,
#          valid_labels=valid_labels,
#          test_features=test_features,
#          test_labels=test_labels)

Train Model¶

Load Dataset

dataset_npz = os.path.join(dataset_location, 'dataset_densenet201_augtom.npz')
dataset_npz

npzfile = np.load(dataset_npz)
train_features = npzfile['train_features']
train_labels = npzfile['train_labels']
valid_features = npzfile['valid_features']
valid_labels = npzfile['valid_labels']
test_features = npzfile['test_features']
test_labels = npzfile['test_labels']

print('Shape of train features (inputs):', train_features.shape)
print('Shape of train labels (targets): ', train_labels.shape)
print('Type of train feat. and labels:  ', train_features.dtype, train_labels.dtype)
print('Sample of labels:', train_labels[:20])

print('Shape of validation features (inputs):', valid_features.shape)
print('Shape of validation labels (targets): ', valid_labels.shape)
print('Type of valid. features and labels:   ', valid_features.dtype, valid_labels.dtype)
print('Sample of labels:', valid_labels[:20])

print('Shape of test features (inputs):', test_features.shape)
print('Shape of test labels (targets): ', test_labels.shape)
print('Type of test. features and labels:   ', test_features.dtype, test_labels.dtype)
print('Sample of labels:', test_labels[:20])

Load dataset to GPU

x_train = torch.tensor(train_features).to(device)
y_train = torch.tensor(train_labels).to(device)
x_valid = torch.tensor(valid_features).to(device)
y_valid = torch.tensor(valid_labels).to(device)
x_test = torch.tensor(test_features).to(device)
y_test = torch.tensor(test_labels).to(device)

Define Model

model = nn.Sequential(OrderedDict([
    ('bn1', nn.BatchNorm1d(1920)),
    ('fc1', nn.Linear(1920, 512)),
    ('elu1', nn.ELU()),
    ('drp1', nn.Dropout(0.75)),
    
#     ('fc2', nn.Linear(512, 512)),
#     ('elu2', nn.ELU()),
#     ('drp2', nn.Dropout(0.75)),
    
    ('fcf', nn.Linear(512, 102)),
]))
model.to(device)

criterion = nn.CrossEntropyLoss() # nn.NLLLoss()
#optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
optimizer = torch.optim.RMSprop(model.parameters(), lr=0.00003)
#scheduler = torch.optim.lr_scheduler()

hist = { 'tloss':[], 'tacc':[],    # mini-batch loss/acc every iteration
         'train_loss':[], 'train_acc':[],       # train set loss/acc every epoch
         'valid_loss':[], 'valid_acc':[] }      # valid set loss/acc every epoch

Helper

def accuracy(logits, labels): 
    predictions = torch.argmax(logits, dim=1)
    return (predictions == labels).float().mean()  # tensor!!

Train model

num_epochs = 100
batch_size = 250

train_size = len(x_train)

train_start_time = time.time()

for epoch in range(num_epochs):
    
    epoch_time_start = time.time()
    
    ### Train ###
    model.train()
    indices = torch.randperm(len(x_train), device=device)          # indices = [2423, 1563, 4854, ...]
    for i in range(0, len(x_train), batch_size):                   # i = 0, batch_size, 2*batch_size, ...
        
        # Pick mini-batch
        x = x_train[indices[i:i+batch_size]]
        y = y_train[indices[i:i+batch_size]]
        
        # Optimize
        optimizer.zero_grad()
        outputs = model(x)                                         # logits
        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()
        
        # Record per-iteration stats
        with torch.no_grad():
            acc = accuracy(outputs, y)
            hist['tacc'].append( acc.item() )
            hist['tloss'].append( loss.item() )
            
    ### Evaluate ###
    model.eval()
    with torch.no_grad():
        
        # Eval on train set
        outputs = model(x_train)                                   # pass-in whole train dataset at once
        loss = criterion(outputs, y_train)
        acc = accuracy(outputs, y_train)
        hist['train_acc'].append( acc.item() )
        hist['train_loss'].append( loss.item() )
        
        # Eval on valid set
        outputs = model(x_valid)
        loss = criterion(outputs, y_valid)
        acc = accuracy(outputs, y_valid)
        hist['valid_acc'].append( acc.item() )
        hist['valid_loss'].append( loss.item() )
        
    epoch_time_interval = time.time() - epoch_time_start
    
    ### Print Summary ###
    if epoch == 0:
        print('      (time )   ep             loss / acc                loss / acc')
    print(f'Epoch ({epoch_time_interval:4.2}s): {epoch:3}'
          f'    Train: {hist["train_loss"][-1]:6.4f} / {hist["train_acc"][-1]:6.4f}'
          f'    Valid: {hist["valid_loss"][-1]:6.4f} / {hist["valid_acc"][-1]:6.4f}')

print()
total_train_time = time.time() - train_start_time
print(f'Total train time {total_train_time:6.2f}s')

      (time )   ep             loss / acc                loss / acc
Epoch ( 2.2s):   0    Train: 0.0416 / 0.9914    Valid: 0.1044 / 0.9792
Epoch ( 2.1s):   1    Train: 0.0418 / 0.9912    Valid: 0.1055 / 0.9792
Epoch ( 2.1s):   2    Train: 0.0416 / 0.9913    Valid: 0.1060 / 0.9780
Epoch ( 2.2s):   3    Train: 0.0414 / 0.9914    Valid: 0.1085 / 0.9792
Epoch ( 2.1s):   4    Train: 0.0419 / 0.9915    Valid: 0.1050 / 0.9792
Epoch ( 2.4s):   5    Train: 0.0414 / 0.9914    Valid: 0.1042 / 0.9804
Epoch ( 2.3s):   6    Train: 0.0413 / 0.9914    Valid: 0.1050 / 0.9780
Epoch ( 2.1s):   7    Train: 0.0415 / 0.9913    Valid: 0.1063 / 0.9804
Epoch ( 2.1s):   8    Train: 0.0413 / 0.9915    Valid: 0.1067 / 0.9780
Epoch ( 2.2s):   9    Train: 0.0412 / 0.9914    Valid: 0.1059 / 0.9804
Epoch ( 2.3s):  10    Train: 0.0415 / 0.9913    Valid: 0.1077 / 0.9792
Epoch ( 2.3s):  11    Train: 0.0413 / 0.9915    Valid: 0.1076 / 0.9768
Epoch ( 2.2s):  12    Train: 0.0410 / 0.9916    Valid: 0.1070 / 0.9804
Epoch ( 2.3s):  13    Train: 0.0416 / 0.9913    Valid: 0.1066 / 0.9792
Epoch ( 2.1s):  14    Train: 0.0412 / 0.9914    Valid: 0.1053 / 0.9804
Epoch ( 2.2s):  15    Train: 0.0413 / 0.9914    Valid: 0.1067 / 0.9804
Epoch ( 2.3s):  16    Train: 0.0414 / 0.9914    Valid: 0.1040 / 0.9780
Epoch ( 2.3s):  17    Train: 0.0413 / 0.9914    Valid: 0.1057 / 0.9804
Epoch ( 2.2s):  18    Train: 0.0412 / 0.9915    Valid: 0.1065 / 0.9804
Epoch ( 2.0s):  19    Train: 0.0412 / 0.9914    Valid: 0.1056 / 0.9792
Epoch ( 2.1s):  20    Train: 0.0413 / 0.9914    Valid: 0.1058 / 0.9792
Epoch ( 2.0s):  21    Train: 0.0413 / 0.9915    Valid: 0.1046 / 0.9804
Epoch ( 2.1s):  22    Train: 0.0412 / 0.9915    Valid: 0.1063 / 0.9780
Epoch ( 2.1s):  23    Train: 0.0411 / 0.9915    Valid: 0.1061 / 0.9792
Epoch ( 2.1s):  24    Train: 0.0410 / 0.9916    Valid: 0.1070 / 0.9804
Epoch ( 2.1s):  25    Train: 0.0407 / 0.9915    Valid: 0.1066 / 0.9792
Epoch ( 2.1s):  26    Train: 0.0412 / 0.9915    Valid: 0.1064 / 0.9804
Epoch ( 2.1s):  27    Train: 0.0409 / 0.9916    Valid: 0.1072 / 0.9792
Epoch ( 2.1s):  28    Train: 0.0408 / 0.9917    Valid: 0.1054 / 0.9792
Epoch ( 2.1s):  29    Train: 0.0410 / 0.9916    Valid: 0.1066 / 0.9804
Epoch ( 2.1s):  30    Train: 0.0412 / 0.9915    Valid: 0.1052 / 0.9804
Epoch ( 2.1s):  31    Train: 0.0408 / 0.9918    Valid: 0.1065 / 0.9804
Epoch ( 2.1s):  32    Train: 0.0408 / 0.9917    Valid: 0.1060 / 0.9792
Epoch ( 2.1s):  33    Train: 0.0406 / 0.9917    Valid: 0.1062 / 0.9792
Epoch ( 2.1s):  34    Train: 0.0408 / 0.9916    Valid: 0.1060 / 0.9792
Epoch ( 2.1s):  35    Train: 0.0407 / 0.9915    Valid: 0.1061 / 0.9792
Epoch ( 2.1s):  36    Train: 0.0407 / 0.9915    Valid: 0.1072 / 0.9804
Epoch ( 2.1s):  37    Train: 0.0408 / 0.9916    Valid: 0.1068 / 0.9792
Epoch ( 2.1s):  38    Train: 0.0406 / 0.9915    Valid: 0.1046 / 0.9792
Epoch ( 2.1s):  39    Train: 0.0404 / 0.9917    Valid: 0.1068 / 0.9792
Epoch ( 2.1s):  40    Train: 0.0410 / 0.9915    Valid: 0.1060 / 0.9804
Epoch ( 2.1s):  41    Train: 0.0406 / 0.9916    Valid: 0.1054 / 0.9804
Epoch ( 2.2s):  42    Train: 0.0408 / 0.9916    Valid: 0.1064 / 0.9804
Epoch ( 2.1s):  43    Train: 0.0407 / 0.9916    Valid: 0.1050 / 0.9817
Epoch ( 2.1s):  44    Train: 0.0406 / 0.9916    Valid: 0.1054 / 0.9792
Epoch ( 2.1s):  45    Train: 0.0403 / 0.9918    Valid: 0.1043 / 0.9804
Epoch ( 2.1s):  46    Train: 0.0405 / 0.9917    Valid: 0.1048 / 0.9804
Epoch ( 2.2s):  47    Train: 0.0408 / 0.9916    Valid: 0.1059 / 0.9792
Epoch ( 2.2s):  48    Train: 0.0405 / 0.9916    Valid: 0.1057 / 0.9780
Epoch ( 2.2s):  49    Train: 0.0408 / 0.9915    Valid: 0.1059 / 0.9817
Epoch ( 2.1s):  50    Train: 0.0402 / 0.9917    Valid: 0.1062 / 0.9804
Epoch ( 2.1s):  51    Train: 0.0405 / 0.9917    Valid: 0.1054 / 0.9804
Epoch ( 2.1s):  52    Train: 0.0403 / 0.9917    Valid: 0.1069 / 0.9792
Epoch ( 2.1s):  53    Train: 0.0406 / 0.9917    Valid: 0.1055 / 0.9792
Epoch ( 2.1s):  54    Train: 0.0406 / 0.9916    Valid: 0.1072 / 0.9780
Epoch ( 2.1s):  55    Train: 0.0401 / 0.9917    Valid: 0.1058 / 0.9780
Epoch ( 2.1s):  56    Train: 0.0404 / 0.9915    Valid: 0.1044 / 0.9792
Epoch ( 2.1s):  57    Train: 0.0404 / 0.9917    Valid: 0.1053 / 0.9792
Epoch ( 2.1s):  58    Train: 0.0406 / 0.9917    Valid: 0.1051 / 0.9817
Epoch ( 2.3s):  59    Train: 0.0401 / 0.9916    Valid: 0.1050 / 0.9780
Epoch ( 2.1s):  60    Train: 0.0403 / 0.9918    Valid: 0.1029 / 0.9780
Epoch ( 2.1s):  61    Train: 0.0405 / 0.9916    Valid: 0.1050 / 0.9792
Epoch ( 2.1s):  62    Train: 0.0403 / 0.9918    Valid: 0.1051 / 0.9792
Epoch ( 2.1s):  63    Train: 0.0403 / 0.9917    Valid: 0.1057 / 0.9792
Epoch ( 2.2s):  64    Train: 0.0402 / 0.9916    Valid: 0.1035 / 0.9792
Epoch ( 2.1s):  65    Train: 0.0402 / 0.9918    Valid: 0.1045 / 0.9780
Epoch ( 2.1s):  66    Train: 0.0402 / 0.9917    Valid: 0.1056 / 0.9804
Epoch ( 2.0s):  67    Train: 0.0403 / 0.9916    Valid: 0.1059 / 0.9804
Epoch ( 2.1s):  68    Train: 0.0402 / 0.9917    Valid: 0.1051 / 0.9792
Epoch ( 2.1s):  69    Train: 0.0401 / 0.9918    Valid: 0.1072 / 0.9792
Epoch ( 2.2s):  70    Train: 0.0404 / 0.9917    Valid: 0.1048 / 0.9804
Epoch ( 2.2s):  71    Train: 0.0399 / 0.9920    Valid: 0.1059 / 0.9792
Epoch ( 2.1s):  72    Train: 0.0399 / 0.9918    Valid: 0.1051 / 0.9792
Epoch ( 2.1s):  73    Train: 0.0401 / 0.9918    Valid: 0.1062 / 0.9804
Epoch ( 2.1s):  74    Train: 0.0399 / 0.9918    Valid: 0.1050 / 0.9792
Epoch ( 2.2s):  75    Train: 0.0399 / 0.9919    Valid: 0.1058 / 0.9792
Epoch ( 2.2s):  76    Train: 0.0398 / 0.9918    Valid: 0.1054 / 0.9780
Epoch ( 2.1s):  77    Train: 0.0400 / 0.9917    Valid: 0.1053 / 0.9792
Epoch ( 2.1s):  78    Train: 0.0399 / 0.9918    Valid: 0.1057 / 0.9804
Epoch ( 2.1s):  79    Train: 0.0400 / 0.9918    Valid: 0.1059 / 0.9780
Epoch ( 2.1s):  80    Train: 0.0399 / 0.9918    Valid: 0.1047 / 0.9804
Epoch ( 2.1s):  81    Train: 0.0395 / 0.9920    Valid: 0.1054 / 0.9804
Epoch ( 2.2s):  82    Train: 0.0400 / 0.9919    Valid: 0.1042 / 0.9804
Epoch ( 2.1s):  83    Train: 0.0397 / 0.9919    Valid: 0.1063 / 0.9792
Epoch ( 2.1s):  84    Train: 0.0398 / 0.9918    Valid: 0.1047 / 0.9792
Epoch ( 2.1s):  85    Train: 0.0401 / 0.9918    Valid: 0.1044 / 0.9804
Epoch ( 2.1s):  86    Train: 0.0397 / 0.9919    Valid: 0.1049 / 0.9792
Epoch ( 2.1s):  87    Train: 0.0393 / 0.9919    Valid: 0.1051 / 0.9792
Epoch ( 2.1s):  88    Train: 0.0396 / 0.9919    Valid: 0.1052 / 0.9792
Epoch ( 2.1s):  89    Train: 0.0399 / 0.9919    Valid: 0.1048 / 0.9804
Epoch ( 2.1s):  90    Train: 0.0399 / 0.9919    Valid: 0.1043 / 0.9804
Epoch ( 2.2s):  91    Train: 0.0398 / 0.9919    Valid: 0.1053 / 0.9817
Epoch ( 2.1s):  92    Train: 0.0396 / 0.9919    Valid: 0.1040 / 0.9817
Epoch ( 2.2s):  93    Train: 0.0394 / 0.9920    Valid: 0.1061 / 0.9817
Epoch ( 2.2s):  94    Train: 0.0397 / 0.9919    Valid: 0.1048 / 0.9817
Epoch ( 2.1s):  95    Train: 0.0399 / 0.9918    Valid: 0.1048 / 0.9792
Epoch ( 2.2s):  96    Train: 0.0399 / 0.9917    Valid: 0.1038 / 0.9817
Epoch ( 2.2s):  97    Train: 0.0396 / 0.9919    Valid: 0.1046 / 0.9804
Epoch ( 2.2s):  98    Train: 0.0398 / 0.9919    Valid: 0.1041 / 0.9792
Epoch ( 2.2s):  99    Train: 0.0397 / 0.9918    Valid: 0.1042 / 0.9792

Total train time 213.85s

Evaluate on all datasets (sic)

for g in optimizer.param_groups:
    print(g['lr'])

1e-05

for g in optimizer.param_groups:
    g['lr'] = 0.00001

0.00003

3e-05

### Evaluate ###
model.eval()
with torch.no_grad():

    # Eval on train set
    outputs = model(x_train)                                   # pass-in whole train dataset at once
    train_loss = criterion(outputs, y_train).item()
    train_acc = accuracy(outputs, y_train).item()

    # Eval on valid set
    outputs = model(x_valid)
    valid_loss = criterion(outputs, y_valid).item()
    valid_acc = accuracy(outputs, y_valid).item()
    
    # Eval on valid set
    outputs = model(x_test)
    test_loss = criterion(outputs, y_test).item()
    test_acc = accuracy(outputs, y_test).item()
    
all_loss = train_loss*len(x_train) + valid_loss*len(x_valid) + test_loss*len(x_test)
all_loss /= len(x_train)+len(x_valid)+len(x_test)
all_acc = train_acc*len(x_train) + valid_acc*len(x_valid) + test_acc*len(x_test)
all_acc /= len(x_train)+len(x_valid)+len(x_test)
    
print( '         loss / acc')
print(f'Train: {train_loss:6.4f} / {train_acc:4.4f}')
print(f'Valid: {valid_loss:6.4f} / {valid_acc:4.4f}')
print(f'Test:  {test_loss:6.4f} / {test_acc:4.4f}')
print(f'All:   {all_loss:6.4f} / {all_acc:4.4f}')

         loss / acc
Train: 0.0528 / 0.9891
Valid: 0.1063 / 0.9804
Test:  0.1039 / 0.9719
All:   0.0541 / 0.9888

def pretty_plot(ax, data, label, color, alpha):
    def smooth(y, n):
        return np.convolve(y, v=np.ones(n)/n, mode='same')
    #ax.scatter(range(len(data)), data, marker='.', s=1, color=color, alpha=alpha/5)
    ax.plot(smooth(data, 55), label=label, color=color, alpha=alpha)

def plot_hist(hist, title):
    fig, (ax, ax2) = plt.subplots(nrows=1, ncols=2, figsize=[16,3])
     
    fig.suptitle(title, fontsize=16)
    
    #ax.plot(hist['train_loss'], label='train_loss', color='blue')
    pretty_plot(ax.twiny(), hist['tloss'], 'tloss', color='blue', alpha=.5)
    ax.plot(hist['valid_loss'], label='valid_loss', color='orange')
    ax.set_title('Loss'); ax.legend(); ax.grid(); ax.set_ylim([0, 1]);
    
    #fig, ax = plt.subplots(nrows=1, ncols=1, figsize=[16,3])
    ax2.plot(hist['train_acc'], label='train_acc', color='blue')
    #pretty_plot(ax2.twiny(), hist['tacc'], 'tacc', color='blue', alpha=1)
    ax2.plot(hist['valid_acc'], label='valid_acc', color='orange')
    ax2.set_title('Accuracy'); ax2.legend(); ax2.grid(); ax2.set_ylim([.8, 1]);
    
    plt.tight_layout()

plot_hist(hist, title='Test')

plot_hist(hist, title='Test')

plot_hist(hist, title='Test')

plot_hist(hist, title='Test')

Save model if happy¶

model_pth = os.path.join(dataset_location, 'model.pth')
model_pth

'/home/marcin/Datasets/udacity-challange-flower-data/flower_data/model.pth'

model_cnn.classifier = model

torch.save({'arch': 'densenet201',
            'state_dict': model_cnn.state_dict(), 
            'class_to_idx': dataset_train.class_to_idx}, 
            model_pth)

model_cnn.classifier = Passthrough()

Results¶

Param in_features was set to match CNN output size.

print(model)

Sequential(
  (fc1): Linear(in_features=1000, out_features=512, bias=True)
  (relu): ReLU()
  (dropout): Dropout(p=0.2)
  (fc2): Linear(in_features=512, out_features=102, bias=True)
)

plot_hist(hist, title='Inception-v3 no-aug')

plot_hist(hist, title='ResNet151 no-aug')

plot_hist(hist, title='ResNet50 no-aug')

plot_hist(hist, title='DenseNet121 no-aug')

plot_hist(hist, title='DenseNet-202 no-aug')