手撸 前向传播+relu+批正则化+dropout+softmax分类的 神经网络代码

import  numpy as np
import pandas as pd

def affine_forward(x,w,b):
    '''
    Inputs:
    :param x: A numpy array containing input data, of shape(N,D
    :param w: A numpy array containing weights, of shape(D, M 权重
    :param b: A numpy array containing baises, of shape( M,) 偏置
    :return:
        out: output, of shape(N, M)
        cache:(x, w, b)
    '''
    out = None #初始化
    # print(x.shape) 100*3072
    # reshaped_x = np.reshape(x, (x.shape[0], -1))
    out = x.dot(w) + b
    cache = (x, w, b)
    return out, cache

def relu_forward(x):

    out = np.maximum(0, x) #取x中的每个元素与0作比较
    cache = x #缓冲进来的x 矩阵
    return out, cache

def affine_relu_forward(x, w, b):
    a, fc_cache = affine_forward(x, w, b) #线性模型
    out, relu_cache = relu_forward(a)    #激活函数
    cache = (fc_cache, relu_cache)       #缓冲的是元组 (x, w, b ,(a))
    return  out, cache
def batch_forward(x, gamma, beta, bn_param):
    mode = bn_param['mode']
    eps = bn_param.get('eps',1e-5)
    momentum = bn_param.get('momentum', 0.9)

    N, D =x.shape
    running_mean = bn_param.get('running_mean', np.zeros(D, dtype=x.dtype))
    running_var  = bn_param.get('running_var', np.zeros(D, dtype=x.dtype))

    out, cache = None, None
    if mode == 'train':
        sample_mean = np.mean(x, axis=0)  #矩阵每一列的平局值 (D,)
        sample_var = np.var(x, axis=0)    #矩阵每一列的方差 (D,)
        x_hat = (x-sample_mean)/(np.sqart(sample_var + eps))

        out = gamma * x_hat+ beta
        cache = (x, sample_mean, sample_var, x_hat, eps, gamma, beta)
        running_mean = momentum * running_mean + (1 - momentum) * sample_mean
        running_var = momentum * running_var + (1- momentum) * sample_var
    elif mode =='test':
        out = (x - running_mean)*gamma/(np.sqrt(running_var + eps)) + beta
    else:
        raise ValueError('Invalid forward batchnorm mode "%s' %mode)

    #store the updated running means bach into bn_param
    bn_param['running_mean'] = running_mean
    bn_param['running_var'] = running_var

    return out, cache

def affine_bn_relu_forward(x, w, b, gamma, beta, bn_param):
    '''
    Inputs:
    :param x: array of shape(N,D1),input to the affine layer
    :param w:  array of shape (D2,D2)  giving the weight and bias for the affine transform
    :param b:                   (D2,)
    :param gamma: array of shape (D2,) giving scaler and shift parameters for batch normalization
    :param beta:                (D2,)
    :param bn_param: dictionary of parameters for batcha normalization.
    :return:
        out: out from RELU, of shape (N,D2)
        cache: object to give to  the backward pass.
    '''
    a, fc_cache = affine_forward(x,w,b)
    a_bn, bn_cache = batch_forward(a,gamma,beta,bn_param)
    out, relu_cache = relu_forward(a_bn)
    cache = (fc_cache, bn_param, relu_cache)
    return out, cache

def batchnorm_backward(dout, cache):
    '''

    :param dout:  upstream derivative, of shape(N,D
    :param cache: Variable of intermediates fros batch_forward.
    :return: tuple
        dx gradient with repect to input x, of shape shape(N,D
        dgamma: gradient with repect to scale parameter gamma, of shape shape(D,)
        dbeta :gradient with repect to shift parameter beta, of shape shape(D,)
    '''
    x, mean, var, x_hat, eps, gamma, beta = cache
    N = x.shape[0]
    dgamma = np.sum(dout * x_hat, axis=0)  #第5行公式
    dbeta = np.sum(dout * 1.0, axis=0) #第6行公式
    dx_hat = dout * gamma
    dx_hat_numerator = dx_hat/ np.sqrt(var + eps)
    dx_hat_denominator = np.sum(dx_hat * (x - mean), axis=0)
    dx_1 = dx_hat_numerator
    dvar = -0.5 * ((var + eps ) **(-1.5) * dx_hat_denominator)
    #note var is also a function of mean
    dmean = -1.0 * np.sum(dx_hat_numerator, axis=0) + \
            dvar * np.mean(-2.0 * (x - mean), axis=0)
    dx_var = dvar * 2.0 /N *(x - mean)
    dx_mean = dmean * 1.0 / N
    # with shape(D,) ,no trouble with broadcast
    dx = dx_1 + dx_var + dx_mean

    return dx, dgamma, dbeta

def dropout_forward(x, dropout_param):
    p, mode =dropout_param['p'],dropout_param['mode']
    if 'seed' in dropout_param:
        np.random.seed(dropout_param['seed'])

    mask = None
    out = None
    #训练模式
    if mode == 'train':
        keep_prob = 1- p
        mask = (np.random.rand(*x.shape) < keep_prob) / keep_prob
        out = mask * x
    #测试模式
    elif mode == 'test':
        out = x

    cache = (dropout_param, mask)
    out = out.astype(x.dtype, copy=False)

    return out, cache

def affine_bn_relu_backward(dout, cache):
    '''
    backward pass for the affine-batchnorm-relu convenience layer
    :param dout:
    :param cache:
    :return:
    '''
    fc_cache, bn_cache, relu_cache = cache
    da_bn = relu_backward(dout, relu_cache)  #relu层
    da, dgamma, dbeta = batchnorm_backward(da_bn, bn_cache)  # Bn层,反向传播时在relu之后
    dx, dw, db = affine_backward(da,fc_cache)
    return dx, dw, db, dgamma, dbeta

def dropout_backward(dout, cache):
    dropout_param, mask = cache
    mode = dropout_param['mode']

    dx = None
    if mode =='train':
        dx = mask * dout
    elif mode == 'test':
        dx = dout
    return dx

def softmax_loss(z, y):
    '''

    :param z: input data, of shape (N ,C) where z[i , j] is the score for
    tht jth class for the ith input
    :param y:  vector of labels, of shape (N,) where y[i] is the label for
    x[i] and 0<=y[i]<=C
    :return: tuple
        loss: scalar giving the loss
        dz: gradient of the loss with respect to z
    '''
    probs = np.exp(z - np.max (z, axis=1, keepdims=True))
    probs /= np.sum(probs, axis=1, keepdims=True)
    N = z.shape[0]
    loss = -np.sum(np.log(probs[np.arange(N),y])) / N
    dz = probs.copy()  # 表示获得变量probs的副本
    dz[np.arange(N), y] -= 1
    dz /= N
    return loss, dz

def affine_backward(dout, cache):
    '''
    computes the backward pass for an affine layer

    :param dout: upstream derivative, of shape(N,M) 上一层的散度输出
    :param cache: tuple of :
            z: input data, of shape(N, d_1, d_2, d_3, d_4...  d_k)
            w: weights, of shape(D,M)
            b: biases, of shape(M,)
    :return:
            dz: gradient with repect to z, of shape shape(N, d_1, d_2, d_3, d_4...  d_k)
            dw: gradient with repect to w, of shape shape(D, M)
            db: gradient with repect to b, of shape shape( M,)
    out = x.dot(w) + b
    n,m = n,d d,m + m

    '''
    z, w, b = cache
    dz, dw, db = None, None, None
    reshaped_x = np.reshape(z, [z.shape[0], -1])
    print(dout.dot(w.T).shape)
    dz = np.reshape(dout.dot(w.T), z.shape)  # N,M * M,D = N ,D
    dw = reshaped_x.T.dot(dout)
    db = np.sum(dout, axis=0)

    return dz, dw, db


def relu_backward(dout, cache):
    '''

    :param dout:
    :param cache:
    :return:
    '''
    dx, x = None, cache
    dx = (x>0) * dout
    #与所有x中元素为正的位置处,位置对应与dout矩阵的元素保留,其他都取0
    return dx

def affine_relu_backward(dout, cache):
    fc_cache, relu_cache = cache        # fc_cache = (x, w, b)
    da = relu_backward(dout, relu_cache)   # da= (x>0)* relu_cache
    dx, dw, db  = affine_backward(da, fc_cache)
    return  dx, dw, db

# class TwoLayerNet (object): # 我们的2层全连接神经网络
#     '''
#     首先,需要初始化我们的神经网络。
#     毕竟,数据从输入层第一次流入到神经网络里,我们的参数(W,B)不能为空
#     也不能都太大或太小,因为参数(W,B)的初始化相当重要的
#     对整个神经网络的训练影响巨大,但 如何proper的 初始化参数仍然没有定论
#     目前仍有很多paper在专门讨论论这个 话题
#     '''
#     def __init__(self
#                  , input_dim = 3*3*32    #每张样本图片的数据维度大小
#                  , hidden_dim = 100        #隐藏层的神经元个数
#                  , num_classes = 10         #样本图片的分类类别个数
#                  , weight_scale = 1e-3):    #初始化参数的权重尺度(标准差
#         '''
#         我们把需要学习的参数(W,B)都存在self.params字典中,
#         其中每个元素都是numpy.arrays
#
#         '''
#         self.param = {}
#         # 3*32*32,100
#         self.param['W1'] = weight_scale * np.random.randn(input_dim,
#                                                           hidden_dim)
#         self.param['b1'] = weight_scale * np.random.randn((hidden_dim,))
#         self.param['W2'] = weight_scale * np.random.randn(hidden_dim,
#                                                         num_classes)
#         self.param['b2'] = weight_scale * np.random.randn((num_classes,))

    #接下来,我们定义最后一个loss函数就可以完成神经网络的构造
    # def loss(self, X, y ):
    #     '''
    #     首先,  输入的数据X 是一个多维的array,shape为(n,3,32,32)
    #     y是与输入数据相对应的正确标签,shape为(N,)
    #     我们的loss函数目标输出一个损失值loss和一个grads的字典,
    #     其中存有loss关于隐藏层和输出层的参数(W,B)的梯度值
    #     :param X:
    #     :param y:
    #     :return:
    #     '''
    #     loss, grads = 0, {}
    #     #数据X在隐藏层和输出层的前向传播:
    #     h1_out, h1_cache = affine_relu_forward(X, self.param['W1'], self.param['b1'])
    #     scores, out_cache = affine_forward(h1_out
    #                                        , self.param['W2']
    #                                        , self.param['b2'])
    #     #输出层后,结合正确标签y得出损失值和其在输出层的梯度:
    #     loss, dout = softmax_loss(scores, y)
    #
    #     #损失值loss的梯度在输出层和隐藏层的反向传播:
    #     dout, dw2, db2 = affine_backward(dout, out_cache)
    #     grads['W2'] = dw2, grads['b2'] = db2
    #     _, dw1, db1 = affine_relu_backward(dout, h1_cache)
    #     grads['W1'] = dw1, grads['b1'] = db1
    #
    #     '''
    #     可以看到图片样本的数据梯度dout只起到了带路的作用,
    #     最终会舍弃掉,我们只要loss关于参数的梯度,
    #     然后保存在grads字典中
    #     '''
    #     # loss += 0.5* self.reg * (np.sum(self.param['W1']**2) +
    #     #                          np.sum(self.param['W2']**2))
    #     # dW2 += self.reg * self.param['W2']
    #     # dW1 += self.reg * self.param['W1']
    #     return loss, grads

class FullyConnectNet(object):
    '''
    一个任意隐藏层数和神经元数的全连接神经网络,其中 RELU激活函数,softmax损失函数,
    同时可选采用 dropout和 batch normalization(批量归一化。那么,对于一个L层神经网络
    的框架是:
    {affine - [batch norm] - relu - [dropout]} * (_L- 1) - affine - softmax
    其中【batch norm】和【dropout】是可选非必须的,框架中{。。。}部分将会重复L-1
    次,表示L-1 个隐藏层

    与我们在上面的定义TwoLayerNet()类保持一致,所有待学习的参数都会在self.params字典中
    并且最终都会被优化 Solver()类训练学习得到
    '''
    # 第一步是初始化我们的FullyConnectedNet()类:
    def __init__(self,
                 hidden_dims,  #一个列表,元素个数是隐藏层数,元素值是该层的隐藏神经元数
                 input_dim =3*32*32, #默认的输入神经元的个数是3072个,匹配cifar数据集
                 num_classes = 10,
                 dropout= 0,     #默认不开启dropout
                 use_batchnorm= False,  #默认不开启批量归一化
                 reg=0.0,               #默认无L2正则化,取某scalar表示正则化强度
                 weight_scale=1e-2,     #默认0.01,表示权重参数初始化的标准差
                 dtype=np.float64,      #默认np.float64精度,要求所有的计算都在此精度下
                 seed=None):            #默认无随机种子,若有会传递dropout层
    #实例 instance 中 增加变量并赋予初值,以方便后面的loss()函数调用
        self.ues_batchnorm = use_batchnorm
        self.use_dropout = dropout > 0
        self.reg = reg
        self.num_layers = 1 + len(hidden_dims)
        self.dtype = dtype
        self.params = {}  #空字典保存待学习的参数变量
        #定义所有隐藏层的参数到字典 self.params 中
        in_dim = input_dim
        for i, h_dim in enumerate(hidden_dims): #eg: (0,h1) (1,h2)
            self.params['W%d'%(i+1,)] = weight_scale * np.random.randn(input_dim, h_dim)
            # print(self.params['W%d'%(i+1,)].shape)
            self.params['b%d'%(i+1,)] = np.zeros((h_dim,))
            if use_batchnorm:
                self.params['gamma%d'%(i+1,)] = np.ones((h_dim,))
                self.params['beta%d'%(i+1,)] = np.zeros((h_dim,))
            in_dim = h_dim #将该隐藏层的列数传递给下一层的行数

         # 定义输出层的参数到字典params 中
        self.params['W%d'%(self.num_layers,)] = weight_scale * np.random.randn(hidden_dims[-1],
                                                                               num_classes)
        self.params['b%d'%self.num_layers] = np.zeros(num_classes)

        '''
        当开启dropout时,我们需要在每一个神经元层中传递一个相同的dropout 参数字典self.dropout_param
        以保证每一层的神经元都知道失活概率P,和当前神经网络的模式状态mode)训练or测试'''
        self.dropout_param = {}  #dropout的参数字典
        if self.use_dropout:
            self.dropout_param = {'mode': 'train', 'p' :dropout}
        if seed is not None:
            self.dropout_param['seed'] = seed
        '''
        当开启批量归一化时,我们要定义一个BN算法的参数列表,
        以用来跟中记录每一层的平局值和标准差。其中,第0个元素self.bn_params[0]
        表示前向传播第1个BN层的参数,第一个元素  self.bn_params[1]表示前向传播第2
        个BN 层的参数'''

        self.bn_params = []
        if self.ues_batchnorm:
            self.bn_params = [{'mode': 'train'} for i in range(self.num_layers-1)]
            #上面 self.bn_params 列表的元素个数 是hidden_layer 的个数

        # 最后,调整所有的待学习神经网络参数 为 指定计算精度:
        for k,v in self.params.items():
            self.params[k] = v.astype(dtype)
    #第二不是定义我们的损失函数
    def loss(self,X, y=None):
        '''
        和TwoLayerNet一样:
        首先,输入的数据X 是一个多维的array, shape为3*32*32
        y是与输出数据对应的标签的 一个数组,shape为(N
        #在训练模式下,
        我们的loss 损失函数目标输出一个损失值loss 和一个grads 字典,
        其中存有loss关于隐藏层和输出层的参数(W,B,gamma,beta)的梯度值
        #在测试模式下,
        我们的loss函数只需要给出输出层周的得分即可,也就是tensorflow的 评估pre_y
        :param X:
        :param y:
        :return:
        '''
        # 把输入的数据预案矩阵X 的精度调整一下
        X = X.astype(self.dtype)
        # 根据正确标签y 是否为None 来调整模式是test 还是train
        mode = 'test' if y is None else 'train'

        '''
        当确定了神经网络所处的模式状态后,
        就可以设置dropout 的参数字典和BN算法的参数列表的mode了
        因为他们在不同的模式下行为是不同的
        '''
        if self.dropout_param is not None:
            self.dropout_param['mode'] = mode

        if self.ues_batchnorm:
            for bn_param in self.bn_params:
                bn_param['mode'] = mode
        scores = None

        '''
        %前向传播%
        如果开启了dropout,我们需要将dropout的参数字典self.dropout_param
        在每一个dropout层中传递。
        如果开启了批量归一化,我们需要指定BN 算法的参数列表,self.bn_params[0]
        对应前向传播第一层的参数, self。bn_params
        '''
        fc_mix_cache = {}       #初始化每层前向传播的缓冲字典
        if self.use_dropout:       #若开启dropout,初始化对应的缓冲字典
            dp_cache = {}

        # 从第一个隐藏层开始循环每一个隐藏层,传递数据out, 保存每一层的缓冲cache
        out = X
        # print(X.shape)
        # print(type(X))
        for i in range(self.num_layers-1):    # 在每一个hidden层中循环
            w, b = self.params['W%d'%(i+1,)], self.params['b%d'%(i+1,)]
            if self.ues_batchnorm:
                gamma = self.params['gamma%d'%(i+1,)]
                beta = self.params['beta%d'%(i+1,)]
                out, fc_mix_cache[i] = affine_bn_relu_forward(out,w,b,gamma,beta,self.bn_params[i])
            else:
                out, fc_mix_cache[i] = affine_relu_forward(out, w, b)
            if self.use_dropout:
                out, dp_cache[i] = dropout_forward(out,self.dropout_param)
        #定义最后的输出层
        w = self.params['W%d'%(self.num_layers,)]
        b = self.params['b%d'%(self.num_layers,)]

        out, out_cache = affine_forward(out, w, b)
        scores = out
        '''
        可以看到,上面对隐藏层的每次循环中,out变量实现了自我迭代更新;
        fc_mix_cache 缓冲字典中顺序的存储了每个隐藏层的得分情况和模型参数(其中可包含BN层参数);
        dp——cache 缓冲字典中单独顺序的存储了每个dropout层的失活概率和遮罩mask;
        out_cache 缓冲字典中 保存了输出层的信息'''
        #接下来让loss函数区分不同的模式
        if mode == 'test':
            return  scores
        '''
        反向传播
        是在训练模式下,
        计算损失值,通过反向传播,更新模型参数的梯度
        '''
        loss, grads = 0.0, {}
        loss, dout = softmax_loss(scores, y)
        loss += 0.5* self.reg * np.sum(self.params['W%d'%(self.num_layers,)]**2)

        dout, dw, db = affine_backward(dout, out_cache)
        grads['W%d'%(self.num_layers,)] = dw + self.reg * self.params['W%d'%(self.num_layers,)]
        grads['b%d'%(self.num_layers,)] = db
        #在每一个隐藏层处,梯度反向传播,不仅更新梯度字典grads,还迭代算出损失值loss
        for i in range(self.num_layers-1):
            ri = self.num_layers - 2 -i  #倒数第ri+1 隐藏层
            loss += 0.5* self.reg * np.sum(self.params['W%d'%(ri+1,)]**2)
            if self.use_dropout:
                dout = dropout_backward(dout,dp_cache[ri])
            if self.ues_batchnorm:
                dout, dw, db ,dgamma, dbeta = affine_bn_relu_backward(dout, fc_mix_cache[ri])
                grads['gamma%d'%(ri+1,)] = dgamma
                grads['beta%d'%(ri+1,)] = dbeta
            else:
                dout, dw, db = affine_relu_backward(dout, fc_mix_cache[ri])
            grads['W%d' %(ri+1,)] = dw + self.reg * self.params['W%d' %(ri+1,)]
            grads['b%d' %(ri+1,)] = db
        return loss, grads

import optim
class Solver(object):
    '''
    我们定义的这个solver 类,将会根据我们的神经网络模型框架-FullyConnectedNet()类,
    在数据源的训练集部分和验证集部分中,训练我们的模型,并且通过周期性的检查准曲率的方式,
    以避免过拟合。

    在这个类中,包括 __init__ () ,共定义5个函数,其中只有train()是最重要的。调用他后,
    会自动启动神经网络模型优化程序。

    训练结束后,经过更新在 验证集上优化后的模型参数会保存在model.params 中。此外,损失值的
    历史训练信息会保存在 solve.loss_history中,还有solver.train_acc_history和solver.val_acc_history
    中会分别保存训练集和验证集在每一次epoch时的模型准确率。
    '''

    #第一步 ,初始化我们的Solver()类
    def __init__(self, model, data, **kwargs):

        #实例中增加变量并赋予初值,以方便后面的train()函数等调用;
        self.model = model
        self.X_train = data['X_train']  #训练样本图片数据
        self.y_train = data['y_train']
        self.X_val, self.y_val = data['X_val'], data['y_val']

        '''
        以下是可选择输入的类参数,逐渐一个一个剪切打包kwargs 参数列表'''

        self.update_rule = kwargs.pop('update_rule', 'sgd_momentum')
        self.optim_confing = kwargs.pop('optim_config', {})
        self.lr_decay = kwargs.pop('lr_decay', 1.0)
        self.batch_size = kwargs.pop('batch_size', 100)
        self.num_epochs = kwargs.pop('num_epochs', 10)
        self.print_every = kwargs.pop('print_every', 10)
        self.verbose = kwargs.pop('verbose', True)
        '''
        异常处理,如果kwargs参数列表中除了上述元素外还有其他的 就报错
        '''
        if len(kwargs) > 0 :
            extra = ','.join('"%s"'% k for k in kwargs.keys())
            raise ValueError("unrecognized arguments %s" %extra)

        '''
        异常处理:如果kwargs参数列表中没有优化算法,就报错!
        将self.update_rule 转化为优化算法的函数,即
        self.update_rule(w, dw, config) = (next_w, config)
        '''
        if not hasattr(optim, self.update_rule):
            raise ValueError('Invalid update_rule "%s" '%self.update_rule)
        self.update_rule = getattr(optim, self.update_rule)
        #执行 _reset()函数
        self._reset()

    #第二部,定义我们 _reset()函数,其仅在类初始化函数 __init__()中调用
    def _reset(self):
        '''
        重置一些用于记录优化的变量
        :return:
        '''
        self.epoch = 0
        self.best_val_acc =0
        self.best_params = {}
        self.loss_history = []
        self.train_acc_history = []
        self.val_acc_history = []
        #make a deep copy of the optim_config for each parameter
        self.optim_confing = {}
        for p in self.model.params:
            d = {k : v for k,v in self.optim_confing.items()}
            self.optim_confing[p] = d
        '''
        上面根据模型中待学习的参数,创建了新的优化字典self.optim_configs,
        形如{'b':{'learning_rate':0.0005},'w ' : {'learning_rate':0.0005}}
        为每个模型参数制定了相同的超参数
        '''

    #第三部,定义我们 _step ,其仅在train()函数中调用
    def _step(self):
        '''
        训练模式下,样本图片数据的一次正向和反向传播,并且更新模型参数一次
        :return:
        '''
        num_train = self.X_train.shape[0]   #训练样本的数据总数
        batch_mask = np.random.choice(num_train, self.batch_size)   # 随机取得输入神经元个数的 样本图片数据
        X_batch = self.X_train[batch_mask]                          # 随机取得输入神经元个数的 样本图片数据
        y_batch = self.y_train[batch_mask]
        #数据通过神经网络后 得到 损失值和梯度字典
        loss, grads = self.model.loss(X_batch, y_batch)
        self.loss_history.append(loss)  #把本次算得的损失值记录下来

        #执行一次模型参数更新
        for p,w in self.model.params.items():
            dw = grads[p]   #取出模型参数p 对应的梯度值
            config = self.optim_confing[p] #取出模型参数p对应的优化超参数
            next_w, next_config = self.update_rule(w, dw, config) #优化算法
            self.model.params[p] = next_w
            self.optim_confing[p] = next_config  #新超参数替代 旧的,如动量v

        #第四部,定义我们 check_accuracy()函数,其仅在train()函数中调用
    def check_accuracy(self, X, y, num_samples=None, batch_size=100):
        '''
        根据某图片样本数据,计算它对应标签的 准确率
        :param X:
        :param y:
        :param num_samples:
        :param batch_size:
        :return:
        '''
        N = X.shape[0]
        if num_samples is not None and N > num_samples:
            mask = np.random.choice(N, num_samples)
            N = num_samples
            X = X[mask]
            y = y[mask]

        #计算 predictions in batches
        num_batches = N / batch_size
        if N % batch_size == 0:
            num_batches += 1
        y_pred = []
        for i in range(int(np.ceil(num_batches))):
            start = i * batch_size
            end = (i+1) * batch_size
            scores = self.model.loss(X[start:end])
            y_pred.append(np.argmax(scores, axis=1))
        y_pred = np.hstack(y_pred)
        # 水平堆叠,从 【 【1,8,9.。。】,【6,5,7.。。】】
        # 变成 【1,8,9.。。,6,5,7。。。。】
        acc = np.mean(y_pred == y)

        return acc

    #第五步,定义我们最重要的train()函数
    def train(self):
        '''
        首先要确定下来总共要进行的得带的次数 num_iterations,
        :return:
        '''
        num_train = self.X_train.shape[0]
        iterations_per_epoch = max(num_train//self.batch_size,1) #每遍迭代的次数
        num_iterations = self.num_epochs * iterations_per_epoch
        '''
        开始迭代循环!
        '''
        for t in range(num_iterations):
            self._step()
            '''
            上面完成了一次神经网络的迭代,此时模型参数已经更新了一次
            并且在.self.loss_history中添加了一个新的loss值
            '''
            if self.verbose and t %self.print_every==0 :
                print('(Iteration %d / %d) loss :%f' %(t+1,num_iterations, self.loss_history[-1]))
            '''
            at the end of every epoch, increment the epoch counter and decay the learning rate
            '''
            epoch_end = (t+1) % iterations_per_epoch == 0
            if epoch_end:
                self.epoch +=1
                for k in self.optim_confing:
                    self.optim_confing[k]['learning_rate'] *= self.lr_decay #第一遍之后开始,每遍给学习率自乘一个衰减率

            #check train and val accuracy on the first iteration , the last
            #iteration , and at the end of each epoch
            first_it = (t == 0)
            last_it =(t == num_iterations-1)
            if first_it or last_it  or epoch_end:
                train_acc = self.check_accuracy(self.X_train, self.y_train, num_samples=1000)
                val_acc = self.check_accuracy(self.X_val, self.y_val, num_samples=1000)
                self.train_acc_history .append(train_acc)
                self.val_acc_history .append(val_acc)

                if self.verbose:
                    print('Epoch %d / %d train acc: %f; val_acc: %f'%(
                        self.epoch, self.num_epochs, train_acc, val_acc
                    ))

                if val_acc > self.best_val_acc:
                    self.best_val_acc = val_acc
                    self.best_params = {}
                    for k,v in self.model.params.items():
                        self.best_params[k] = v.copy()

        '''
        结束迭代循环
        '''
        self.model.params = self.best_params

import tensorflow as tf
import numpy as np
import cifar_read_data_sets
# Load the raw CIFAR-10 data.
cifar10_dir = '../cifar/cifar-10-batches-py'
X_train, y_train, X_test, y_test = cifar_read_data_sets.load_CIFAR10(cifar10_dir)
X_train = np.reshape(X_train, [X_train.shape[0],-1])
X_test = np.reshape(X_test, [X_test.shape[0],-1])
# print(X_train.shape)
data = {'X_train': X_train ,
'y_train': y_train ,
'X_val': X_test,
'y_val': y_test}

model = FullyConnectNet(hidden_dims=[100,], reg= 0.1)
solver = Solver(model, data, update_rule='sgd_momentum', optim_config={'learning_rate':1e-3,
                                                            },
                lr_decay=0.95,  #学习率的衰减速率
                num_epochs=10,
                batch_size=100,
                print_every=100)
solver.train()

 

 

optim

import numpy as np
def sgd_momentum(w, dw, config=None):
    '''
    Performs stochastic gradient descent with momentum.
    config format:
    -learning_rate: scalar learning rate
    -momentum: scalar 0-1
     如果0,就等于sgd了
     velocity: a numpy array of the same shape as w and dw used to store a moving average of
     the gradients.
    :param w:
    :param dw:
    :param config:
    :return:
    '''
    if config is None:
        config = {}
    config.setdefault('learning_rate', 1e-2)
    config.setdefault('momentum', 0.9)
    v = config.get('velocity', np.zeros_like(w))
    next_w = None
    v = config['momentum'] * v - config['learning_rate'] * dw
    next_w = dw + v
    config['velocity'] = v

    return next_w, config

整段代码摘自网易云,如有侵权,望告知,本人属学习目的。

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值