Machine Learning Workflow ✅

import torch
import torchvision
import matplotlib.pyplot as pl
import numpy as np

1 Dataset

A dataset is a large collection of input-output pairs: \(\{(x_i, y_i): i\in D\}\). Each pair \((x, y)\) is called a sample.

The dataset is to be used for both training and testing of a machine learning system.
For neural network based training, we have to encode everything as tensors. Thus, each sample must be transformed into some tensor of real numbers.
For numerical reasons, we want the rescale the numerical value to values close to \([-1, 1]\).

1.1 PyTorch datasets

PyTorch provides a set of standard datasets for educational and research purposes.

from torchvision import transforms

transform = transforms.Compose([
    transforms.ToTensor(),
])

dataset = torchvision.datasets.mnist.MNIST('/workspace/datasets/', download=True, transform=transform)

1.2 The MNIST dataset

#
# The input data is 28x28 tensor of integers.
#
dataset.data.shape, dataset.data.dtype

(torch.Size([60000, 28, 28]), torch.uint8)

#
# The output data is just integers.
#
dataset.targets.shape, dataset.targets.dtype

(torch.Size([60000]), torch.int64)

Let’s inspect the data as an image.

x, y = dataset[0]
x.shape, type(y)

(torch.Size([1, 28, 28]), int)

x.squeeze_()
pl.imshow(x, cmap='gray')
pl.xticks([]); pl.yticks([])
pl.title(f'Label = {y}');

fig = pl.figure(figsize=(6,4))

for i in range(12):
    x, y = dataset[i]
    ax = fig.add_subplot(3,4,i+1)
    ax.set_aspect('equal')
    ax.set_xticks([])
    ax.set_yticks([])
    ax.imshow(x.squeeze(), cmap='gray')

2 Data loader

Data loader is a stage of the machine learning pipeline that feeds data into the neural networks.

Shuffle dataset samples into pseudo random positions to avoid bad gradient estimation.
Split dataset samples into small batches during stochastic gradient descent training.

PyTorch provides a helper class to perform data loading.

dataloader = torch.utils.data.DataLoader(dataset, batch_size=36, shuffle=True)

for (xs, ys) in dataloader:
    break
xs.shape, ys.shape

(torch.Size([36, 1, 28, 28]), torch.Size([36]))

im = torchvision.utils.make_grid(xs, nrow=6)
pl.imshow(np.transpose(im, (1,2,0)))
pl.xticks([])
pl.yticks([]);

3 Neural Network Model

Now we can build the model and optimizer for training.

3.1 A MLP model

from torch import nn

class MyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Flatten(),
            nn.Linear(28*28, 100),
            nn.ReLU(),
            nn.Linear(100, 10)
        )
    def forward(self, x):
        return self.net(x)

model = MyModel()

3.2 The loss function

loss_fn = nn.CrossEntropyLoss()

3.3 Optimizer

optimizer = torch.optim.Adam(model.parameters())

4 Training

def train(model, optimizer, loss_fn, epochs):
    for epoch in range(epochs):
        for (i, (x, y_true)) in enumerate(dataloader):
            optimizer.zero_grad()
            y_out = model(x)
            loss = loss_fn(y_out, y_true)
            loss.backward()
            optimizer.step()
            if i % 100 == 0:
                report(model, dataset, loss_fn)
        report(model, dataset, loss_fn)
        
def report(model, dataset, loss_fn):
    with torch.no_grad():
        length = len(dataset)
        for x, y_true in torch.utils.data.DataLoader(dataset, batch_size=length):
            y_out = model(x)
            loss = loss_fn(y_out, y_true).item()
            y_pred = y_out.argmax(axis=1)
            success = (y_pred == y_true).sum().item()
            accuracy = success / length
            print(f"Loss = {loss:.2f}, Accuracy = {accuracy:.2f}")

train(model, optimizer, loss_fn, epochs=1)

Loss = 2.28, Accuracy = 0.22
Loss = 0.51, Accuracy = 0.86
Loss = 0.38, Accuracy = 0.89
Loss = 0.33, Accuracy = 0.90
Loss = 0.31, Accuracy = 0.91
Loss = 0.29, Accuracy = 0.91
Loss = 0.26, Accuracy = 0.92
Loss = 0.25, Accuracy = 0.93
Loss = 0.25, Accuracy = 0.93
Loss = 0.23, Accuracy = 0.93
Loss = 0.22, Accuracy = 0.93
Loss = 0.20, Accuracy = 0.94
Loss = 0.19, Accuracy = 0.95
Loss = 0.19, Accuracy = 0.94
Loss = 0.18, Accuracy = 0.95
Loss = 0.17, Accuracy = 0.95
Loss = 0.16, Accuracy = 0.95
Loss = 0.16, Accuracy = 0.95

5 Model Evaluation

test_dataset = torchvision.datasets.MNIST('/workspace/datasets/', train=False, transform=transform)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=len(test_dataset))

model.eval()

report(model, test_dataset, loss_fn)

Loss = 0.16, Accuracy = 0.95

6 Model In Production

for k, v in model.state_dict().items():
    print(k, v.shape)

net.1.weight torch.Size([100, 784])
net.1.bias torch.Size([100])
net.3.weight torch.Size([10, 100])
net.3.bias torch.Size([10])

6.1 Saving model to disk

torch.save(model.state_dict(), './model_params.pt')

6.2 Loading model from disk

model_state_dict = torch.load('./model_params.pt')

model_in_prod = MyModel()
model_in_prod.load_state_dict(model_state_dict)

<All keys matched successfully>

model_in_prod

MyModel(
  (net): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=784, out_features=100, bias=True)
    (2): ReLU()
    (3): Linear(in_features=100, out_features=10, bias=True)
  )
)

report(model_in_prod, test_dataset, loss_fn)

Loss = 0.16, Accuracy = 0.95