卷积神经网络——简单的卷积神经网络构建(包含卷积、池化、全连接)

参考:鱼书——斋藤康毅

网络结构

请添加图片描述
主要包含卷积层、激活函数、池化层、Dropout、全连接层。

首先初始化。
隐藏层就是一个小型的BP神经网络,放的是全连接层。图像经过卷积层得到像素特征,并在前向传播的过程中获得权重加持后的图像。在多次卷积后,特征图大小会有所减少。具体计算方式按照卷积和池化的顺序计算:
C o n v h = 1 + H + 2 ⋅ p a d − f i l t e r h s t r i d e ( C o n v w 同理 ) Conv_h = 1 + \frac{H + 2 \cdot pad - filter_h}{stride}\quad \quad(Conv_w同理) Convh=1+strideH+2padfilterh(Convw同理)

P o o l O u t h = 1 + C o n v h − p o o l h s t r i d e PoolOut_h = 1 + \frac{Conv_h - pool_h}{stride} PoolOuth=1+strideConvhpoolh

按照代码所给参数,经过六个卷积和多个池化的输出特征图大小为:

Modulesize, pad, strideFeature Map Size
Conv3, 1, 128
Relu28
Conv3, 1, 128
Relu28
Pool2, --, 214
Conv3, 1, 114
Relu14
Conv3, 2, 116
Relu16
Pool2, --, 28
Conv3, 1, 18
Relu8
Conv3, 1, 18
Relu8
Pool2, --, 24

在卷积和池化过程中,通道数逐渐被修改为filter_num,也就是滤波器个数(包含卷积和池化)。
在最后池化结束后,进入到全连接层,输入数据为 64 ⋅ P o o l O u t h ⋅ P o o l O u t w 64 \cdot PoolOut_h \cdot PoolOut_w 64PoolOuthPoolOutw.
在源代码中,有一个pre_node_nums = np.array([1*3*3, 16*3*3, 16*3*3, 32*3*3, 63*3*3, 64*4*4] + hidden_size_list,全连接层的神经元个数取决于上一层个数,而卷积的神经元个数取决于通道数(或者filter_num) * feature_map_h* feature_map_w。所以我认为,原作者在这里有点问题,应该写成1*28*28, 16*28*28, 16*28*28, 32*14*14, 32*16*16, 64*8*8, 64*4*4.
接下来就是构建网络层,没有什么好讲的。需要注意的是第一个全连接层的输入为64*4*4, 相当于有64个通道,4 * 4大小的图片。
最后就是权重更新之类的工作,训练写成了一个trainer类,就是斋藤老爷子的写法,代码放在最后。

网络代码:

# coding: utf-8
import sys, os
sys.path.append(os.pardir)  # 为了导入父目录的文件而进行的设定
import pickle
import numpy as np
from collections import OrderedDict
from common.layers import *


class DeepConvNet:
    def __init__(self, hidden_size_list=[200, 10, 50, 20], input_dim=(1, 28, 28),
                 conv_param_1={'filter_num': 16, 'filter_size': 3, 'pad': 1, 'stride': 1},
                 conv_param_2={'filter_num': 16, 'filter_size': 3, 'pad': 1, 'stride': 1},
                 conv_param_3={'filter_num': 32, 'filter_size': 3, 'pad': 1, 'stride': 1},
                 conv_param_4={'filter_num': 32, 'filter_size': 3, 'pad': 2, 'stride': 1},
                 conv_param_5={'filter_num': 64, 'filter_size': 3, 'pad': 1, 'stride': 1},
                 conv_param_6={'filter_num': 64, 'filter_size': 3, 'pad': 1, 'stride': 1},
                 output_size=10, dropout_ratio=0.5):
        self.hidden_size_list = hidden_size_list

        pre_node_nums = np.array([1*28*28, 16*28*28, 16*28*28, 32*14*14, 32*16*16, 64*8*8, 64*4*4] + hidden_size_list)

        weight_init_scales = np.sqrt(2.0 / pre_node_nums)
        # if weight_init_scales.lower() in ('relu', 'he'):
        #     weight_init_scales = np.sqrt(2.0 / pre_node_num)
        # elif weight_init_scales.lower() in ('relu', 'he'):
        #     weight_init_scales = np.sqrt(1.0 / pre_node_num)

        self.params = {}
        pre_channel_num = input_dim[0]

        for idx, conv_param in enumerate([conv_param_1, conv_param_2, conv_param_3, conv_param_4, conv_param_5, conv_param_6]):
            self.params['W' + str(idx+1)] = weight_init_scales[idx] * np.random.randn(conv_param['filter_num'], pre_channel_num, conv_param['filter_size'], conv_param['filter_size'])
            self.params['b' + str(idx+1)] = np.zeros(conv_param['filter_num'])
            pre_channel_num = conv_param['filter_num']  # 要注意这个

        self.params['W7'] = weight_init_scales[6] * np.random.randn(64*4*4, hidden_size_list[0])
        self.params['b7'] = np.zeros(hidden_size_list[0])
        self.params['W' + str(7 + len(hidden_size_list))] = weight_init_scales[len(weight_init_scales)-1] * np.random.randn(hidden_size_list[-1], output_size)
        self.params['b' + str(7 + len(hidden_size_list))] = np.zeros(output_size)

        index = 0
        # for idx in range(7, 7+len(hidden_size_list)):
        for idx in range(7, 7+len(hidden_size_list)-1):
            self.params['W' + str(idx+1)] = weight_init_scales[idx] * np.random.randn(hidden_size_list[index], hidden_size_list[index+1])
            self.params['b' + str(idx+1)] = np.zeros(hidden_size_list[index+1])
            index = index + 1

        self.layers = []
        self.layers.append(Convolution(self.params['W1'], self.params['b1'],
                                       conv_param_1['stride'], conv_param_1['pad']))  # 0
        self.layers.append(Relu())  # 1
        self.layers.append(Convolution(self.params['W2'], self.params['b2'],
                                       conv_param_1['stride'], conv_param_1['pad']))  # 2
        self.layers.append(Relu())  # 3
        self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))  # 4
        self.layers.append(Convolution(self.params['W3'], self.params['b3'],
                                       conv_param_1['stride'], conv_param_1['pad']))  # 5
        self.layers.append(Relu())  # 6
        self.layers.append(Convolution(self.params['W4'], self.params['b4'],
                                       conv_param_4['stride'], conv_param_4['pad']))  # 7
        self.layers.append(Relu())  # 8
        self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))  # 9
        self.layers.append(Convolution(self.params['W5'], self.params['b5'],
                                       conv_param_5['stride'], conv_param_5['pad']))  # 10
        self.layers.append(Relu())  # 11
        self.layers.append(Convolution(self.params['W6'], self.params['b6'],
                                       conv_param_6['stride'], conv_param_6['pad']))  # 12
        self.layers.append(Relu())  # 13
        self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))  # 14
        self.layers.append(Affine(self.params['W7'], self.params['b7']))  # 15
        self.layers.append(Relu())  # 16
        self.layers.append(Dropout(0.5))  # 17
        # for idx in range(7, 7+len(hidden_size_list)):
        for idx in range(7, 7+len(hidden_size_list)-1):
            self.layers.append(Affine(self.params['W' + str(idx+1)], self.params['b' + str(idx+1)]))  # 18 21 24
            self.layers.append(Relu())  # 19 22 25
            self.layers.append(Dropout(dropout_ratio))  # 20 23 26
        self.layers.append(Affine(self.params['W' + str(7 + len(hidden_size_list))], self.params['b' + str(7 + len(hidden_size_list))]))  # 27
        self.layers.append(Dropout(dropout_ratio))

        self.last_layer = SoftmaxWithLoss()

    def predict(self, x, train_flg=False):
        for layer in self.layers:
            if isinstance(layer, Dropout):
                x = layer.forward(x, train_flg)
            else:
                x = layer.forward(x)
        return x

    def loss(self, x, t):
        y = self.predict(x, train_flg=True)
        return self.last_layer.forward(y, t)

    def accuracy(self, x, t, batch_size=100):
        if t.ndim != 1 : t = np.argmax(t, axis=1)

        acc = 0.0

        for i in range(int(x.shape[0] / batch_size)):
            tx = x[i*batch_size:(i+1)*batch_size]
            tt = t[i*batch_size:(i+1)*batch_size]
            y = self.predict(tx, train_flg=False)
            y = np.argmax(y, axis=1)
            acc += np.sum(y == tt)

        return acc / x.shape[0]

    def gradient(self, x, t):
        self.loss(x, t)

        dout = 1
        dout = self.last_layer.backward(dout)

        tmp_layers = self.layers.copy()
        tmp_layers.reverse()
        for layer in tmp_layers:
            dout = layer.backward(dout)

        grads = {}
        for i, layer_idx in enumerate((0, 2, 5, 7, 10, 12, 15, 18, 21, 24, 27)):
            grads['W' + str(i+1)] = self.layers[layer_idx].dW
            grads['b' + str(i+1)] = self.layers[layer_idx].db

        return grads

    def save_params(self, file_name="params.pkl"):
        params = {}
        for key, val in self.params.items():
            params[key] = val
        with open(file_name, 'wb') as f:
            pickle.dump(params, f)

    def load_params(self, file_name="params.pkl"):
        with open(file_name, 'rb') as f:
            params = pickle.load(f)
        for key, val in params.items():
            self.params[key] = val

        for i, layer_idx in enumerate((0, 2, 5, 7, 10, 12, 15, 18, 21, 24, 27)):
            self.layers[layer_idx].W = self.params['W' + str(i+1)]
            self.layers[layer_idx].b = self.params['b' + str(i+1)]

trainer代码:

# coding: utf-8
import sys, os
sys.path.append(os.pardir)  # 为了导入父目录的文件而进行的设定
import numpy as np
from common.optimizer import *

class Trainer:
    """进行神经网络的训练的类
    """
    def __init__(self, network, x_train, t_train, x_test, t_test,
                 epochs=20, mini_batch_size=100,
                 optimizer='SGD', optimizer_param={'lr':0.01}, 
                 evaluate_sample_num_per_epoch=None, verbose=True):
        self.network = network
        self.verbose = verbose
        self.x_train = x_train
        self.t_train = t_train
        self.x_test = x_test
        self.t_test = t_test
        self.epochs = epochs
        self.batch_size = mini_batch_size
        self.evaluate_sample_num_per_epoch = evaluate_sample_num_per_epoch

        # optimzer
        optimizer_class_dict = {'sgd':SGD, 'momentum':Momentum, 'nesterov':Nesterov,
                                'adagrad':AdaGrad, 'rmsprpo':RMSprop, 'adam':Adam}
        self.optimizer = optimizer_class_dict[optimizer.lower()](**optimizer_param)
        
        self.train_size = x_train.shape[0]
        self.iter_per_epoch = max(self.train_size / mini_batch_size, 1)
        self.max_iter = int(epochs * self.iter_per_epoch)
        self.current_iter = 0
        self.current_epoch = 0
        
        self.train_loss_list = []
        self.train_acc_list = []
        self.test_acc_list = []

    def train_step(self):
        batch_mask = np.random.choice(self.train_size, self.batch_size)
        x_batch = self.x_train[batch_mask]
        t_batch = self.t_train[batch_mask]
        
        grads = self.network.gradient(x_batch, t_batch)
        self.optimizer.update(self.network.params, grads)
        
        loss = self.network.loss(x_batch, t_batch)
        self.train_loss_list.append(loss)
        if self.verbose: print("train loss:" + str(loss))
        
        if self.current_iter % self.iter_per_epoch == 0:
            self.current_epoch += 1
            
            x_train_sample, t_train_sample = self.x_train, self.t_train
            x_test_sample, t_test_sample = self.x_test, self.t_test
            if not self.evaluate_sample_num_per_epoch is None:
                t = self.evaluate_sample_num_per_epoch
                x_train_sample, t_train_sample = self.x_train[:t], self.t_train[:t]
                x_test_sample, t_test_sample = self.x_test[:t], self.t_test[:t]
                
            train_acc = self.network.accuracy(x_train_sample, t_train_sample)
            test_acc = self.network.accuracy(x_test_sample, t_test_sample)
            self.train_acc_list.append(train_acc)
            self.test_acc_list.append(test_acc)

            if self.verbose: print("=== epoch:" + str(self.current_epoch) + ", train acc:" + str(train_acc) + ", test acc:" + str(test_acc) + " ===")
        self.current_iter += 1

    def train(self):
        for i in range(self.max_iter):
            self.train_step()

        test_acc = self.network.accuracy(self.x_test, self.t_test)

        if self.verbose:
            print("=============== Final Test Accuracy ===============")
            print("test acc:" + str(test_acc))

训练代码:

# coding: utf-8
import sys, os
sys.path.append(os.pardir)  # 为了导入父目录的文件而进行的设定
import pickle
import numpy as np
from collections import OrderedDict
from common.layers import *


class DeepConvNet:
    def __init__(self, hidden_size_list=[200, 10, 50, 20], input_dim=(1, 28, 28),
                 conv_param_1={'filter_num': 16, 'filter_size': 3, 'pad': 1, 'stride': 1},
                 conv_param_2={'filter_num': 16, 'filter_size': 3, 'pad': 1, 'stride': 1},
                 conv_param_3={'filter_num': 32, 'filter_size': 3, 'pad': 1, 'stride': 1},
                 conv_param_4={'filter_num': 32, 'filter_size': 3, 'pad': 2, 'stride': 1},
                 conv_param_5={'filter_num': 64, 'filter_size': 3, 'pad': 1, 'stride': 1},
                 conv_param_6={'filter_num': 64, 'filter_size': 3, 'pad': 1, 'stride': 1},
                 output_size=10, dropout_ratio=0.5):
        self.hidden_size_list = hidden_size_list

        pre_node_nums = np.array([1*28*28, 16*28*28, 16*28*28, 32*14*14, 32*16*16, 64*8*8, 64*4*4] + hidden_size_list)

        weight_init_scales = np.sqrt(2.0 / pre_node_nums)
        # if weight_init_scales.lower() in ('relu', 'he'):
        #     weight_init_scales = np.sqrt(2.0 / pre_node_num)
        # elif weight_init_scales.lower() in ('relu', 'he'):
        #     weight_init_scales = np.sqrt(1.0 / pre_node_num)

        self.params = {}
        pre_channel_num = input_dim[0]

        for idx, conv_param in enumerate([conv_param_1, conv_param_2, conv_param_3, conv_param_4, conv_param_5, conv_param_6]):
            self.params['W' + str(idx+1)] = weight_init_scales[idx] * np.random.randn(conv_param['filter_num'], pre_channel_num, conv_param['filter_size'], conv_param['filter_size'])
            self.params['b' + str(idx+1)] = np.zeros(conv_param['filter_num'])
            pre_channel_num = conv_param['filter_num']  # 要注意这个

        self.params['W7'] = weight_init_scales[6] * np.random.randn(64*4*4, hidden_size_list[0])
        self.params['b7'] = np.zeros(hidden_size_list[0])
        self.params['W' + str(7 + len(hidden_size_list))] = weight_init_scales[len(weight_init_scales)-1] * np.random.randn(hidden_size_list[-1], output_size)
        self.params['b' + str(7 + len(hidden_size_list))] = np.zeros(output_size)

        index = 0
        # for idx in range(7, 7+len(hidden_size_list)):
        for idx in range(7, 7+len(hidden_size_list)-1):
            self.params['W' + str(idx+1)] = weight_init_scales[idx] * np.random.randn(hidden_size_list[index], hidden_size_list[index+1])
            self.params['b' + str(idx+1)] = np.zeros(hidden_size_list[index+1])
            index = index + 1

        self.layers = []
        self.layers.append(Convolution(self.params['W1'], self.params['b1'],
                                       conv_param_1['stride'], conv_param_1['pad']))  # 0
        self.layers.append(Relu())  # 1
        self.layers.append(Convolution(self.params['W2'], self.params['b2'],
                                       conv_param_1['stride'], conv_param_1['pad']))  # 2
        self.layers.append(Relu())  # 3
        self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))  # 4
        self.layers.append(Convolution(self.params['W3'], self.params['b3'],
                                       conv_param_1['stride'], conv_param_1['pad']))  # 5
        self.layers.append(Relu())  # 6
        self.layers.append(Convolution(self.params['W4'], self.params['b4'],
                                       conv_param_4['stride'], conv_param_4['pad']))  # 7
        self.layers.append(Relu())  # 8
        self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))  # 9
        self.layers.append(Convolution(self.params['W5'], self.params['b5'],
                                       conv_param_5['stride'], conv_param_5['pad']))  # 10
        self.layers.append(Relu())  # 11
        self.layers.append(Convolution(self.params['W6'], self.params['b6'],
                                       conv_param_6['stride'], conv_param_6['pad']))  # 12
        self.layers.append(Relu())  # 13
        self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))  # 14
        self.layers.append(Affine(self.params['W7'], self.params['b7']))  # 15
        self.layers.append(Relu())  # 16
        self.layers.append(Dropout(0.5))  # 17
        # for idx in range(7, 7+len(hidden_size_list)):
        for idx in range(7, 7+len(hidden_size_list)-1):
            self.layers.append(Affine(self.params['W' + str(idx+1)], self.params['b' + str(idx+1)]))  # 18 21 24
            self.layers.append(Relu())  # 19 22 25
            self.layers.append(Dropout(dropout_ratio))  # 20 23 26
        self.layers.append(Affine(self.params['W' + str(7 + len(hidden_size_list))], self.params['b' + str(7 + len(hidden_size_list))]))  # 27
        self.layers.append(Dropout(dropout_ratio))

        self.last_layer = SoftmaxWithLoss()

    def predict(self, x, train_flg=False):
        for layer in self.layers:
            if isinstance(layer, Dropout):
                x = layer.forward(x, train_flg)
            else:
                x = layer.forward(x)
        return x

    def loss(self, x, t):
        y = self.predict(x, train_flg=True)
        return self.last_layer.forward(y, t)

    def accuracy(self, x, t, batch_size=100):
        if t.ndim != 1 : t = np.argmax(t, axis=1)

        acc = 0.0

        for i in range(int(x.shape[0] / batch_size)):
            tx = x[i*batch_size:(i+1)*batch_size]
            tt = t[i*batch_size:(i+1)*batch_size]
            y = self.predict(tx, train_flg=False)
            y = np.argmax(y, axis=1)
            acc += np.sum(y == tt)

        return acc / x.shape[0]

    def gradient(self, x, t):
        self.loss(x, t)

        dout = 1
        dout = self.last_layer.backward(dout)

        tmp_layers = self.layers.copy()
        tmp_layers.reverse()
        for layer in tmp_layers:
            dout = layer.backward(dout)

        grads = {}
        for i, layer_idx in enumerate((0, 2, 5, 7, 10, 12, 15, 18, 21, 24, 27)):
            grads['W' + str(i+1)] = self.layers[layer_idx].dW
            grads['b' + str(i+1)] = self.layers[layer_idx].db

        return grads

    def save_params(self, file_name="params.pkl"):
        params = {}
        for key, val in self.params.items():
            params[key] = val
        with open(file_name, 'wb') as f:
            pickle.dump(params, f)

    def load_params(self, file_name="params.pkl"):
        with open(file_name, 'rb') as f:
            params = pickle.load(f)
        for key, val in params.items():
            self.params[key] = val

        for i, layer_idx in enumerate((0, 2, 5, 7, 10, 12, 15, 18, 21, 24, 27)):
            self.layers[layer_idx].W = self.params['W' + str(i+1)]
            self.layers[layer_idx].b = self.params['b' + str(i+1)]
 load_params(self, file_name="params.pkl"):
        with open(file_name, 'rb') as f:
            params = pickle.load(f)
        for key, val in params.items():
            self.params[key] = val

        for i, layer_idx in enumerate((0, 2, 5, 7, 10, 12, 15, 18, 21, 24, 27)):
            self.layers[layer_idx].W = self.params['W' + str(i+1)]
            self.layers[layer_idx].b = self.params['b' + str(i+1)]
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值