首先是卷积运算
在numpy中通过im2col算法实现。
通过将输入,卷积核拉平做矩阵乘法,得到输出。
import numpy as np
def im2col(image, size, stride=1):
N, H, W, C = image.shape
out_h = (H - size) // stride + 1
out_w = (W - size) // stride + 1
col = np.zeros((N * out_h * out_w, size * size * C))
out_size = out_h * out_w
for h in range(out_h):
h_start = h*stride
h_end = h_start + size
h_next = h*out_w
for w in range(out_w):
w_start = w*stride
w_end = w_start + size
col[h_next+w::out_size, :] = image[:, h_start:h_end, w_start:w_end, :].reshape(N, -1)
return col
def conv_forward(X, W, stride=1, padding='same'):
filters, size, size, C = W.shape
if padding == 'same':
p = size // 2
X = np.pad(X, ((0, 0), (p, p), (p, p), (0, 0)), 'constant')
N, H_input, W_input, C = X.shape
col = im2col(X, size, stride)
logit = np.dot(col, W.reshape(W.shape[0], -1).transpose())
logit = logit.reshape((N, logit.shape[0] // N, -1))
out_h = (H_input - size) // stride + 1
return np.reshape(logit, (N, out_h, -1, filters))
def pooling(X, pool_size, stride=2, pooling='max'):
N, H_input, W_input, C = X.shape
# [N*out_size, pool_size*pool_size*C]
col = im2col(X, pool_size, stride)
col = col.reshape((col.shape[0], C, -1))
out_h = (H_input - pool_size) // stride + 1
if pooling == 'max':
max_out = np.zeros((col.shape[0], C, 1))
for row in range(col.shape[0]):
for c in range(C):
max_out[row, c, :] = np.max(col[row, c])
return max_out.reshape((N, out_h, -1, C))
elif pooling == 'avg':
avg_out = np.zeros((col.shape[0], C, 1))
for row in range(col.shape[0]):
for c in range(C):
avg_out[row, c, :] = np.average(col[row, c])
return avg_out.reshape((N, out_h, -1, C))
def main():
x_data = np.random.randn(10, 224, 224, 3)
w = np.random.randn(16, 3, 3, 3)
logit = conv_forward(x_data, w)
if __name__ == '__main__':
main()