预备
import numpy as np
def zero_pad(image, pad_height , pad_width):
H,W = image.shape
out = None
###
out = np.zeros((H+2*pad_height, W+2*pad_width))
for i in range(H):
for j in range(W):
out[i+pad_height,j+pad_width] = image[i,j]
### end
return out
conv_nested
使用最基础的四重嵌套for循环进行计算,分别遍历图片和卷积核
def conv_nested(image, kernel):
Hi, Wi = image.shape
Hk, Wk = kernel.shape
out = np.zeros((Hi, Wi))
### YOUR CODE HERE
for m in range(Hi):
for n in range(Wi):
for i in range(Hk):
for j in range(Wk):
if(m - i + Hk//2 >= 0 and n - j + Wk//2 >= 0) and (m - i + Hk//2 < Hi and n - j + Wk//2 <Wi):
out[m,n] += kernel[i,j] * image[m - i + Hk//2, n - j + Wk//2]
### END YOUR CODE
return out
conv_fast
题目给了提示使用np.flip()
和np.sum()
函数简化计算,将四重for循环减少为两重,同时也不需要去计算卷积核和图片在计算时对应的位置
def conv_fast(image, kernel):
Hi, Wi = image.shape
Hk, Wk = kernel.shape
out = np.zeros((Hi, Wi))
#print(Hi,Wi,Hk,Wk)
### YOUR CODE HERE
pad_img = zero_pad(image,Hk//2,Wk//2)
#print(pad_img.shape)
kernel_fliped = np.flip(np.flip(kernel,0),1)
for m in range(Hi):
for n in range(Wi):
out[m,n] = np.sum(pad_img[m: m+Hk, n : n+Wk] * kernel_fliped)
### END YOUR CODE
return out
conv_faster
将pad过后的图像矩阵每次要和卷积核进行元素相乘的部分单独抽出来作为新矩阵的一行的数据,依次填充得到一个新的矩阵大小为
(
H
i
∗
W
i
,
H
k
∗
W
k
)
(H_i*W_i,H_k*W_k)
(Hi∗Wi,Hk∗Wk),然后将卷积核也reshape
为
(
H
k
∗
W
k
,
1
)
(H_k*W_k,1)
(Hk∗Wk,1),使用矩阵相乘可以大大缩短计算时间。GPU对矩阵计算进行了优化。
def conv_faster(image, kernel):
Hi, Wi = image.shape
Hk, Wk = kernel.shape
out = np.zeros((Hi, Wi))
### YOUR CODE HERE
pad_image = zero_pad(image, Hk//2, Wk//2)
kernel_filp = np.flip(np.flip(kernel, 0), 1)
mat = np.zeros((Hi*Wi, Hk*Wk))
for i in range(Hi*Wi):
row = i // Wi
col = i % Wi
# 将需要计算的卷积核大小的数据填充到第i行
mat[i, :] = pad_image[row: row+Hk, col: col+Wk].reshape(1, Hk*Wk)
out = mat.dot(kernel_flip.reshape(Hk*Wk, 1)).reshape(Hi, Wi)
### END YOUR CODE
return out