CNN Convolution and Pooling
Posted on June 2, 2019
Tags: machinelearning
1 Imports
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
print("Using torch", torch.__version__)
= torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
device from torchvision import transforms
from PIL import Image
= transforms.Compose([
trans_GreyScale
transforms.Grayscale()
])= Image.open("sample.png")
imgRaw = trans_GreyScale(imgRaw) img
from torchvision import datasets
= datasets.MNIST(root=Path.cwd(), download=True)
dataset = dataset[10] img, label
='gray') plt.imshow(img,cmap
2 Kernel(Filter) Convolution
2.1 Kernel(Filter) matrix
= torch.tensor([[[[ -1, -1, -1],
kernel 2, 2, 2],
[ -1, -1, -1]]]],dtype=torch.float) [
2.2 Image to Tensor
= transforms.Compose([
trans_Img2Tensor
transforms.ToTensor()
])= trans_Img2Tensor(img) demo
2.3 Convolve
= torch.nn.functional.conv2d(demo,kernel,stride=1,padding=0) convImg_tensor
2.4 Tensor to Image
= transforms.Compose([
trans_Tensor2Img
transforms.ToPILImage()
])= trans_Tensor2Img(convImg_tensor) newImg
='gray') plt.imshow(newImg,cmap
3 Pooling
= torch.nn.functional.max_pool2d(convImg_tensor,kernel_size=2)
poolImg_tensor = trans_Tensor2Img(poolImg_tensor) newpoolImg
='gray') plt.imshow(newpoolImg,cmap
4 MultiChannel Convolution

= torch.tensor([[[[ -1, -1, -1],
kernel3 2, 2, 2],
[ -1, -1, -1]],
[ -1, -1, -1],
[[ 2, 2, 2],
[ -1, -1, -1]],
[ -1, -1, -1],
[[ 2, 2, 2],
[ -1, -1, -1]]]]
[ =torch.float)
,dtype#ideally if you built the tensor correctly above, the below reshaping should do nothing
= torch.reshape(kernel3,(1,3,3,3))
kernel3c # Shape = (Num of Samples,Channels,Height,Width) = (1,3,3,3);
= trans_Img2Tensor(imgRaw)
demo = torch.nn.functional.conv2d(demo,kernel3c)
convImg_tensor = trans_Tensor2Img(convImg_tensor) Img3c
- Take notice that the output is a single channel AKA Black&White image.
- This is how it should be and how convolution works
- Multi-channel convolution collapses the 3 channels into 1 channel.
TFjs
- vertical filter
= tf.tensor4d([-1,-1,-1,2,2,2,-1,-1,-1, -1,-1,-1,2,2,2,-1,-1,-1, -1,-1,-1,2,2,2,-1,-1,-1],[3,3,3,1],"float32")
const kernFilter #gets converted into
= tf.tensor4d([[[[-1],[-1],[-1]],
const kernFilter 2],[2],[2]],
[[-1],[-1],[-1]]],
[[-1],[-1 ],[-1]],
[[[2],[2 ],[2]],
[[-1],[-1 ],[-1]]],
[[-1],[-1],[-1]],
[[[2],[2],[2]],
[[-1],[-1],[-1]]]]) [[