Quick Pytorch template 1
Posted on June 2, 2019
Tags: machinelearning
1 Summary
Build a neural network that binary classifies YES(1) or NO(0) whether we can create a target sum given a list of coins with certain values.
eg.
coins = {1,2,5} target=14
OUTPUT: 1
since 5+5+2+2=14
- We will Fix the target sum as 250 and the length of bag as 3 meaning we can only have coins of 3 different values.
- We will Fix the bounded range of possible values for the coins as a number between 1 and 240.
2 Imports
## Standard libraries
import os
import math
import numpy as np
import time
## Imports for plotting
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_formats = ['pdf','svg']
from matplotlib.colors import to_rgba
import seaborn as sns
set()
sns.
## Progress bar
from tqdm.notebook import tqdm
import torch
print("Using torch", torch.__version__)
= torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") device
3 Coin Change problem
def _get_change_making_matrix(set_of_coins, r: int):
= [[0 for _ in range(r + 1)] for _ in range(len(set_of_coins) + 1)]
m for i in range(1, r + 1):
0][i] = float('inf') # By default there is no way of making change
m[return m
def change_making(coins, n: int):
"""This function assumes that all coins are available infinitely.
n is the number to obtain with the fewest coins.
coins is a list or tuple with the available denominations.
"""
= _get_change_making_matrix(coins, n)
m for c, coin in enumerate(coins, 1):
for r in range(1, n + 1):
# Just use the coin
if coin == r:
= 1
m[c][r] # coin cannot be included.
# Use the previous solution for making r,
# excluding coin
elif coin > r:
= m[c - 1][r]
m[c][r] # coin can be used.
# Decide which one of the following solutions is the best:
# 1. Using the previous solution for making r (without using coin).
# 2. Using the previous solution for making r - coin (without
# using coin) plus this 1 extra coin.
else:
= min(m[c - 1][r], 1 + m[c][r - coin])
m[c][r] return m[-1][-1]
1,2,5],14) change_making([
4
3.1 Torch variant coin change
#coinChange that takes a coin::tensorArray and returns a tensor
def torchChangeMake(coin,n):
coin.tolist()= 1 if change_making(coin.tolist(),n) != float('inf') else 0
possible return torch.Tensor([possible])
1,7,(3,)),7) torchChangeMake(torch.randint(
1,7,(3,)) torch.randint(
tensor([2, 5, 2])
1,7,(3,)),7) torchChangeMake(torch.randint(
tensor([1.])
4 Formatting Dataset for Dataloader
- Goal is to get a sense of how to build our dataset
- We wont be using the code here directly but we will use these concepts to build the dataloader
4.1 Build our dataset INPUTS
- In our coin change problem, these are the bags of coins.
self.data
which we will implement in the future will follow this pattern
def buildinput(self):
in the next section will use this code
=1,high=5,size=(10,3)) torch.randint(low
tensor([[2, 1, 3],
[2, 1, 3],
[1, 4, 1],
[4, 1, 4],
[3, 2, 2],
[1, 4, 2],
[3, 1, 3],
[4, 1, 4],
[2, 4, 2], [3, 2, 3]])
4.2 Build out dataset OUTPUTS
- In our coin change problem, either 1 or 0 represent whether it is possible to make 40 from each of the bags from our previous section.
self.label
which we will implement in the future will follow this pattern
def buildlabel(self):
in the next section will use this code
= torch.empty((0,))
acc for i in torch.randint(low=1,high=30,size=(10,3)):
= torch.cat((acc,torchChangeMake(i,40)),0)
acc print(torchChangeMake(i,30))
tensor([1.])
tensor([1.])
tensor([1.])
tensor([1.])
tensor([1.])
tensor([1.])
tensor([0.])
tensor([0.])
tensor([1.]) tensor([1.])
acc
is the data output which merged the tensors in the loop above into 1 tensor array
print(acc)
Notice the shape is not a vector but a single list
tensor([1., 1., 1., 1., 1., 1., 0., 0., 1., 1.])
5 Creating your Dataset class
- Create a class that extends
data.Dataset
to make it compatible with torch’s dataloader.
Must fill the 2 class parameters belowself.data
- In our coin change problem this is a list of lists AKA list of bags of coins
self.label
- In our coin change problem this is a list of YES(1) or NO(0)
import torch.utils.data as data
class MainDataset(data.Dataset):
def __init__(self,size):
super().__init__()
self.size = size #size is observation AKA Number of Inputs AKA rows of a database
self.generateData()
def generateData(self):
self.buildinput()
self.buildlabel() #label AKA the output AKA expected result
def buildinput(self): #We built this from the previous section
= 3
FeaturesCount = torch.randint(low=1,high=240,size=(self.size,FeaturesCount))
data self.data = data
def buildlabel(self): #We built this from the previous section
= torch.empty((0,))
acc for i in self.data:
= torchChangeMake(i,250)
minCoinsLabel = torch.cat((acc,minCoinsLabel),0) #0 represents appended on 0-dim which is a typical array
acc self.label = acc
def __len__(self):
# Number of data point we have. Alternatively self.data.shape[0], or self.label.shape[0]
return self.size
def __getitem__(self, idx):
# Return the idx-th data point of the dataset
# If we have multiple things to return (data point and label), we can return them as tuple
= self.data[idx]
data_point = self.label[idx]
data_label return data_point, data_label
# ONLY THE TEMPLATE, DO NOT COPY AND USE THIS
class templateDataset(data.Dataset):
def __init__(self,size):
super().__init__()
self.size = size
self.generateData()
def generateData(self):
self.data = #...
self.label = #...
pass
def __len__(self):
# Number of data point we have. Alternatively self.data.shape[0], or self.label.shape[0]
return self.size
def __getitem__(self, idx):
# Return the idx-th data point of the dataset
# If we have multiple things to return (data point and label), we can return them as tuple
= self.data[idx]
data_point = self.label[idx]
data_label return data_point, data_label
= MainDataset(size=200)
dataset print("Size of dataset:", len(dataset))
print("Data point 0:", dataset[0])
print("Data point 0:", dataset[0])
Size of dataset: 200 Data point 0: (tensor([218, 6, 90]), tensor(0.))
5.1 Torch Dataloader
We are not using the dataloader yet, just investigating it in this section
- Since we built our dataset class based on torch’s spec, loading it into torch’s dataloader is trivial.
= data.DataLoader(dataset, batch_size=8, shuffle=True) data_loader
# next(iter(...)) catches the first batch of the data loader
# If shuffle is True, this will return a different batch every time we run this cell
# For iterating over the whole dataset, we can simple use "for batch in data_loader: ..."
= next(iter(data_loader))
data_inputs, data_labels
# The shape of the outputs are [batch_size, d_1,...,d_N] where d_1,...,d_N are the
# dimensions of the data point returned from the dataset class
print("Data inputs", data_inputs.shape, "\n", data_inputs)
print("Data labels", data_labels.shape, "\n", data_labels)
Data inputs torch.Size([8, 3])
tensor([[177, 222, 63],
[239, 187, 20],
[224, 15, 76],
[110, 177, 134],
[ 67, 20, 173],
[ 80, 58, 116],
[ 35, 25, 212],
[165, 74, 239]])
Data labels torch.Size([8]) tensor([0., 0., 0., 0., 0., 0., 1., 0.])
6 Building the Neuron
import torch.nn as nn
import torch.nn.functional as F
- Simple Neuron
__init__
is where we define the- FEATURE dimensions
num_inputs
- OUTPUT dimensions
num_outputs
- hidden or bias dimensions
num_hidden
- layer transition functions (linear + nonlinear activation)
- FEATURE dimensions
forward
is where we apply the functions
Example:
Coin change problem since we use a fixed bag of 3 coins, our num_inputs=3
Our output is either yes or no which is 1 dimensions so num_outputs=1
num_hidden
can be arbitrarily reasonable, we pick 1
class ExampleNeuron(nn.Module):
def __init__(self, num_inputs, num_hidden, num_outputs):
super().__init__()
self.linear1 = nn.Linear(num_inputs, num_hidden)
self.act_fn = nn.Tanh()
self.linear2 = nn.Linear(num_hidden, num_outputs)
def forward(self,x):
= self.linear1(x)
x = self.act_fn(x)
x = self.linear2(x)
x return x
= ExampleNeuron(num_inputs=3, num_hidden=1, num_outputs=1) model
7 Loss function
= nn.BCEWithLogitsLoss()
loss_module # loss_module = nn.L1Loss()
8 Gradient Descent
= torch.optim.SGD(model.parameters(), lr=0.1) optimizer
9 Training - Putting it all together
= MainDataset(size=200)
train_dataset = data.DataLoader(train_dataset, batch_size=12, shuffle=True) train_data_loader
def train_model(model, optimizer, data_loader, loss_module, num_epochs=100):
# Set model to train mode
model.train()
# Training loop
for epoch in range(num_epochs):
for data_inputs, data_labels in data_loader:
## Step 1: Move input data to device (only strictly necessary if we use GPU)
= data_inputs.to(device)
data_inputs = data_labels.to(device)
data_labels
## Step 2: Run the model on the input data
= model(data_inputs.float())
preds = preds.squeeze(dim=1) # Output is [Batch size, 1], but we want [Batch size]
preds
## Step 3: Calculate the loss
= loss_module(preds, data_labels)
loss
## Step 4: Perform backpropagation
# Before calculating the gradients, we need to ensure that they are all zero.
# The gradients would not be overwritten, but actually added to the existing ones.
optimizer.zero_grad()# Perform backpropagation
loss.backward()
## Step 5: Update the parameters
optimizer.step()
train_model(model.cuda(), optimizer, train_data_loader, loss_module)
10 Evaluation
= MainDataset(size=500)
test_dataset # drop_last -> Don't drop the last batch although it is smaller than 128
= data.DataLoader(test_dataset, batch_size=128, shuffle=False, drop_last=False) test_data_loader
def eval_model(model, data_loader):
eval() # Set model to eval mode
model.= 0., 0.
true_preds, num_preds
with torch.no_grad(): # Deactivate gradients for the following code
for data_inputs, data_labels in data_loader:
# Determine prediction of model on dev set
= data_inputs.to(device), data_labels.to(device)
data_inputs, data_labels = model(data_inputs.float())
preds = preds.squeeze(dim=1)
preds = torch.sigmoid(preds) # Sigmoid to map predictions between 0 and 1
preds = (preds >= 0.5).long() # Binarize predictions to 0 and 1
pred_labels
# Keep records of predictions for the accuracy metric (true_preds=TP+TN, num_preds=TP+TN+FP+FN)
+= (pred_labels == data_labels).sum()
true_preds += data_labels.shape[0]
num_preds
= true_preds / num_preds
acc print(f"Accuracy of the model: {100.0*acc:4.2f}%")
eval_model(model, test_data_loader)
Accuracy of the model: 79.20%