python实现神经网络

# P 141
import numpy as np 

class NeuralNetwork:
    def __init__(self,layers,alpha=0.1):
        # initialize the list of weights matrices, then store the 
        # network architecture and learning rate 
        self.W = []
        self.layers = layers
        self.alpha = alpha
    
        # start looping from the index of the first layer but 
        # stop before we reach the last two layers 
        for i in np.arange(0,len(layers) -2):
            # randomly initialize a weight matrix connecting the 
            # number of nodes in each respective layer together,
            # adding an extra node for the bias
            w = np.random.randn(layers[i] + 1,layers[i+1] + 1)
            self.W.append(w / np.sqrt(layers[i]))

        # the last two layers are a special case where the input 
        # connections need a bias term but the output does not
        w = np.random.randn(layers[-2] + 1,layers[-1])
        self.W.append(w / np.sqrt(layers[-2]))

    def __repr__(self):
        # construct and return a string that represents the network
        # architecture
        return "NeuralNetwork: {}".format(\
                        "-".join(str(l) for l in self.layers))

    def sigmoid(self,x):
        # compute and return the sigmoid activation value for a 
        # given input value 
        return 1.0 / (1 + np.exp(-x))
    
    def sigmoid_deriv(self,x):
        # compute the derivative of the sigmoid function ASSUMING
        # that 'x' has already been passed through the 'sigmoid'
        # function 
        return x * (1 - x)

    def fit(self,X,y,epochs=1000,displayUpdate=100):
        # insert a column of 1's as the last entry in the feature
        # matrix -- this little trick allows us to treat the bias
        # as a trainable parameter within the weight matrix 
        X = np.c_[X,np.ones((X.shape[0]))]

        # loop over the desired number of epochs 
        for epoch in np.arange(0,epochs):
            # loop over each individual data point and train 
            # our network on it 
            for (x,target) in zip(X,y):
                self.fit_partial(x,target)

            # check to see if we should display a training update 
            if epoch == 0 or (epoch+1) % displayUpdate == 0:
                loss = self.calculate_loss(X,y)
                print("[INFO] epoch={},loss={:.7f}".format(\
                        epoch + 1,loss))

    def fit_partial(self,x,y):
        # construct our list of output activations for each layer
        # as our data point flows through the network; the first
        # activation is a speical case -- it's just the input 
        # feature vector itself 
        A = [np.atleast_2d(x)]

        # FEEDFORWARD 
        # loop over the layers in the network 
        for layer in np.arange(0,len(self.W)):
            # feedforward the activation at the current layer by 
            # taking the dot product between the activation and 
            # the weight matrix -- this is called the "net input"
            # to the current layer 
            net = A[layer].dot(self.W[layer])

            # computing the "net output" is simply applying our 
            # nonlinear activation function to the net input 
            out = self.sigmoid(net)

            # once we have the net output, add it to our list of 
            # activations 
            A.append(out)
        
        # BACKPROPAGATION
        # the first phase of backpropagation is to compute the 
        # difference between our *prediction* (the final output
        # activation in the activatinos list) and the true target
        # value 
        error = A[-1] -y 

        # from here, we need to apply the chain rule and build our 
        # list of deltas 'D'; the first entry in the delta is 
        # simply the error of the output layer times the derivative 
        # of our activation function for the output value 
        D = [error * self.sigmoid_deriv(A[-1])]

        # once you understand the chain rule it becomes super easy
        # to implement with a 'for' -- simply loop over the 
        # layers in reverse order(ignoring the last two since we 
        # already have taken them into account)
        for layer in np.arange(len(A) - 2,0,-1):
            # the delta for the current layer is equal to the delta
            # of the *previous layer* dotted with the weight matrix
            # of the current layer,followed by multiplying the delta
            # by the derivation of the nonlinear activation function 
            # for the activations of the curent layer
            delta = D[-1].dot(self.W[layer].T)
            delta = delta*self.sigmoid_deriv(A[layer])
            D.append(delta)

            # since we looped over our layers in reverse order we need to 
            # reverse the deltas
            D = D[::-1]

            # WEIGHT UPDATE PHASE
            # loop over the layers
            for layer in np.arange(0,len(self.W)):
                # update our weights by taking the dot product of the layer
                # activations with their respective deltas,then multiplying
                # this value by some small learning rate and adding to our 
                # weight matrix -- this is where the actual "learning" takes
                # place
                self.W[layer] += -self.alpha * A[layer].T.dot(D[layer])
    
    def predict(self,X,addBias=True):
        # initialize the output prediction as the input features -- this 
        # value will be (forward) propagated through the network to 
        # obtain the final prediciton 
        p = np.atleast_2d(X)

        # check to see if the bias column should be added 
        if addBias:
            # insert a column of 1's as the last entry in the feature 
            # matrix (bias)
            p = np.c_[p,np.ones((p.shape[0]))]
        
        # loop over our layers in the network 
        for layer in np.arange(0,len(self.W)):
            # computing the output prediction is as simple as taking 
            # the dot product between the current activation value 'p'
            # and the weight matrix associated with the current layer,
            # then passing this value through a nonlinear activation
            # function 
            p = self.sigmoid(np.dot(p,self.W[layer]))

        # return the predicted value
        return p 
    
    def calculate_loss(self,X,targets):
        # make predictions for the input data points then compute
        # the loss
        targets = np.atleast_2d(targets)
        predictions = self.predict(X,addBias=False)
        loss = 0.5 * np.sum((predictions - targets) ** 2)

        # return the loss
        return loss 

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值