Simulate Linear Regression with Torch
Posted on June 2, 2019
Tags: machinelearning
1 Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
= pd.read_csv("../dataset/Boston.csv") Boston
= Boston['lstat']
x = Boston['medv'] y
import torch
print("Using torch", torch.__version__)
= torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") device
Using torch 1.11.0
= torch.FloatTensor(x)
X = torch.FloatTensor(y)
Y print(X.shape,Y.shape)
torch.Size([506]) torch.Size([506])
2 Creating your Dataset class
import torch.utils.data as data
class myDataset(data.Dataset):
def __init__(self):
super().__init__()
self.generateData()
def generateData(self):
self.data = X
self.label = Y
self.size = len(Y)
def __len__(self):
# Number of data point we have. Alternatively self.data.shape[0], or self.label.shape[0]
return self.size
def __getitem__(self, idx):
# Return the idx-th data point of the dataset
# If we have multiple things to return (data point and label), we can return them as tuple
= self.data[idx]
data_point = self.label[idx]
data_label return data_point, data_label
= myDataset() MainDataset
len(MainDataset)
506
3] MainDataset[
(tensor(2.9400), tensor(33.4000))
3 Building the Neuron
import torch.nn as nn
import torch.nn.functional as F
class ExampleNeuron(nn.Module):
def __init__(self, num_inputs, num_hidden, num_outputs):
super().__init__()
self.linear1 = nn.Linear(num_inputs, num_hidden)
# self.act_fn = nn.Tanh()
self.linear2 = nn.Linear(num_hidden, num_outputs)
def forward(self,x):
= self.linear1(x)
x # x = self.act_fn(x)
= self.linear2(x)
x return x
= ExampleNeuron(num_inputs=1, num_hidden=2, num_outputs=1) model
4 Loss function
= nn.MSELoss() loss_module
5 Gradient Descent
= torch.optim.SGD(model.parameters(), lr=1e-4,weight_decay=1e-6) optimizer
6 Training - Putting it all together
= MainDataset
train_dataset = data.DataLoader(train_dataset, shuffle=True) train_data_loader
def train_model(model, optimizer, data_loader, loss_module, num_epochs=100):
# Set model to train mode
model.train()
# Training loop
for epoch in range(num_epochs):
for data_inputs, data_labels in data_loader:
## Step 1: Move input data to device (only strictly necessary if we use GPU)
= data_inputs.to(device)
data_inputs = data_labels.to(device)
data_labels
## Step 2: Run the model on the input data
= model(data_inputs.float())
preds
## Step 3: Calculate the loss
= loss_module(preds, data_labels)
loss
## Step 4: Perform backpropagation
# Before calculating the gradients, we need to ensure that they are all zero.
# The gradients would not be overwritten, but actually added to the existing ones.
optimizer.zero_grad()# Perform backpropagation
loss.backward()
## Step 5: Update the parameters
optimizer.step()
train_model(model, optimizer, train_data_loader, loss_module)
- The function call
train_model
mutates AKA trains themodel
object.
7 Prediction of single example
- To make a prediction simply pass a input tensor value into
model(..)
model(torch.FloatTensor([14.1]))
print("original datapoint: ",MainDataset[150],"\n")
= torch.FloatTensor([14.1])
singleinput
= model(singleinput) #Really this simple to make a prediction
singleprediction
print(f" input:{singleinput}\n prediction:{singleprediction}")
original datapoint: (tensor(14.1000), tensor(21.5000))
input:tensor([14.1000]) prediction:tensor([16.9899], grad_fn=<AddBackward0>)
Input with 14.1, has a real value of 21.5 while our prediction is 16.9899
8 Plotting list of predictions
BE VERY CAREFUL: train_data_loader
is shuffled shuffle=True
Referring to codeblock below:
data_inputs
is an randomly ordered.- A BAD naive append would be to create only a list of predictions(VERSUS a tuple that pairs prediction with input).
- If you plot our original ordered inputs with the naive append unordered predictions, you made a mistake
- Alternative solution would be to make another data_loader that with
shuffled=False
and loop over thatvisualize_data_loader = data.DataLoader(train_dataset, shuffle=False)
= [] #list of tuples [(x5,pred_y5),(x2,pred_y2),(x7,pred_y7)..]
acc for data_inputs, data_labels in train_data_loader: #REMEMBER it is shuffed AKA unordered
= model(data_inputs.float()).detach().numpy()
unordered_pred = data_inputs.detach().numpy()
unordered_input = acc + [(unordered_input,unordered_pred)] #Important to ppair BOTH unordered input and unordered output
acc #acc = acc + [unordered_pred] #BAD naive append
= zip(*acc) #zip(*acc) is the unzip operation; extracts [(x1,y1),(x2,y2),...] => [x1,x2..] [y1,y2..] unorderedX,unorderedY
plt.scatter(X,Y) plt.scatter(unorderedX,unorderedY)

BAD
#These X and Y are ordered
plt.scatter(X,Y) #Very BAD plt.scatter(X,unorderedY)
