斯坦福机器学习课程神经网络作业的Python实现

最新推荐文章于 2022-11-20 20:06:20 发布
ericzha
最新推荐文章于 2022-11-20 20:06:20 发布
阅读量1.1k
点赞数 1
分类专栏：机器学习文章标签： python 机器学习神经网络斯坦福大学
本文链接：https://blog.csdn.net/ericzha/article/details/62423934
版权
机器学习专栏收录该内容
1 篇文章 0 订阅
订阅专栏
最近在学习斯坦福大学的机器学习课程，自己把ex4的作业用Python实现了一下，分享出来供大家参考学习。欢迎提出改进建议或者一起讨论。
使用类封装
可以由参数定义多层hidden layers
使用scipy的minimize函数计算theta （尝试过多种算法，不解如何调整参数，使用BFGS算法返回MemoryError)
如果自己实现循环不适用minimize函数是否效率会比较低？这个还没有做测试，如果谁能测试一下希望告知
# -*- coding: utf-8 -*-
import numpy as np
import scipy.io as sio
import scipy.optimize as op
import threading
import datetime

class NerualNetwork():
    @property
    def ThetaSet(self):
        return self.__theta_set
    
    @property
    def ResultMsg(self):
        return self.__resultMsg

    def __init__(self):
        np.random.seed(1)
        self.trainingState = "Not Started"
        self.__inputs = np.array([])
        self.__outputs = np.array([])
        self.__num__labels = np.array([])
        self.__theta_set = []
        self.__regular = 0
        self.__iterations = 0
        self.__minimize_method = 'L-BFGS-B'
        self.__minimize_disp = False
        self.__labels = []
        self.__resultMsg = None

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def sigmoidGradient(self, z):
        ret = z * (1 - z)
        return ret

    def unrollTheta(self, theta_set):
        v = []
        for t in theta_set:
            v.extend(t.flatten().tolist())
        return np.array(v)

    def obtainTheta(self, theta):
        theta_set = []
        pos1 = 0
        pos2 = 0
        for t in self.__theta_set:
            pos2 += t.size
            tmp = theta[pos1 : pos2]
            tmp = tmp.reshape(t.shape)
            theta_set.append(tmp)
            pos1 = pos2
        return theta_set

    def costFunction(self, nn_params, inputs, outputs):
        # Set up parameters
        theta_set = self.obtainTheta(nn_params)
        m = np.size(inputs, 0)
        J = 0
        layers = len(theta_set)
        theta_grad = []
        for i in range(layers):
            theta_grad.append(np.zeros(theta_set[i].shape))

        # Forward propagation to calculate the cost
        aval = []
        aval.append(inputs)
        
        for i in range(layers):
            a = self.sigmoid(aval[-1].dot(theta_set[i].T))
            a = np.column_stack((np.ones((m, 1)), a))
            aval.append(a)

        
        # Calculate the cost
        jval = np.sum((-1) * outputs * np.log(aval[-1][:, 1:]) - (1 - outputs) * np.log(1 - aval[-1][:, 1:])) / m
        rval = 0
        for x in theta_set:
            rval += np.sum(np.power(x[:,1:],2))
        
        rval = rval * self.__regular/(2*m)
        J = jval + rval

        # Implement the backward propagation to compute the gradients
        errors = []
        deltas = []
        # errors in the backward order, i.e the last error is calculated firstly.
        for i in range(layers):
            # The last error = the last predicted values from forward propagation- outputs
            if i == 0:
                errors.append(aval[-1][:,1:] - outputs)
                deltas.append(errors[-1] * self.sigmoidGradient(aval[layers][:,1:]))
            else:
                errors.append(errors[-1].dot(theta_set[layers-i][:,1:]))
                deltas.append(errors[-1] * self.sigmoidGradient(aval[layers-i][:,1:]))

        # Note that the forward and backword lists are in reversed orders
        for i in range(layers):
            theta_grad[i] = deltas[layers-1-i].T.dot(aval[i])

        theta_regular = []
        for t in theta_set:
            theta_regular.append(np.column_stack((np.zeros((np.size(t,0),1)),t[:,1:])))
        
        for i in range(layers):
            theta_grad[i] = (1/m) * theta_grad[i] + (self.__regular * theta_regular[i])/m

        grad = self.unrollTheta(theta_grad)
        return (J, grad)

    def minimize(self, initial_theta):
        
        # See more options from https://docs.scipy.org/doc/scipy/reference/optimize.html

        opts = {'maxiter':self.__iterations, 'disp':self.__minimize_disp}
        if self.__minimize_method == "TNC":
            opts['maxCGit'] = 0
            opts['stepmx'] = 500
        elif self.__minimize_method == 'L-BFGS-B':
            opts['eps'] = 1e-8
        else:
            pass

        Result = op.minimize(fun = self.costFunction,
                     x0 = initial_theta,
                     args = (self.__inputs, self.__outputs),
                     method = self.__minimize_method,
                     jac = True,
                     options= opts)

        optimal_theta = Result.x;

        self.__resultMsg = "Iterations = " + str(Result.nit)
        self.__resultMsg += "\n" + str(Result.message)
        
        self.__theta_set = self.obtainTheta(optimal_theta);


    def train(self, training_set_inputs, training_set_outputs,
              number_of_labels, labels, theta_set, regular,
              training_iterations, minimize_method, minimize_disp):
        self.__inputs = training_set_inputs
        self.__outputs = training_set_outputs
        self.__num__labels = number_of_labels
        self.__labels = labels
        self.__theta_set = theta_set
        self.__regular = regular
        self.__iterations = training_iterations
        self.__minimize_method = minimize_method
        self.__minimize_disp = minimize_disp

        if (self.__checkInput()):
            self.trainingState = "Training"
            print(".....Initialize theta with random weights between(-1, 1)")
            for i in range(len(self.__theta_set)):
                rd = 2 * np.random.random(self.__theta_set[i].shape) - 1
                self.__theta_set[i] = rd
            initial_theta = self.unrollTheta(self.__theta_set)
            print(".....Minimize the cost function in a new thread.")
            # This line is for test
            J,g = self.costFunction(initial_theta, self.__inputs, self.__outputs)
            print("Cost = %f" % J)
            t = threading.Thread(target=self.minimize, args=(initial_theta,))
            t.setDaemon(True)
            t.start()
            t.join()
            self.trainingState = "Done";
        else:
            print(".....Change the parameters and try again.")

    def predict(self, predict_input):
        h = None
        m = np.size(predict_input,0)
        for t in self.__theta_set:
            if (h is None):
                h = self.sigmoid(predict_input.dot(t.T))
            else:
                h = np.column_stack((np.ones((m,1)),h))
                h = self.sigmoid(h.dot(t.T))
        
        label = np.zeros((m,1)) -1
        for i in range(m):
            label[i] = self.__labels[np.nanargmax(h[i,:])]
        return label

    def getAccuracy(self, inputs, outputs):
        acc = 0.0
        if (self.__checkInput()):
            m = np.size(inputs,0)
            h=None
            for t in self.__theta_set:
                if (h is None):
                    h = self.sigmoid(inputs.dot(t.T))
                else:
                    h = np.column_stack((np.ones((m,1)),h))
                    h = self.sigmoid(h.dot(t.T))
                    
            check = (np.argmax(h,1) == np.argmax(outputs,1))
            acc = np.sum(check)/np.size(check,0)
        else:
            print("Cannot to calculate the accuracy.")
        return acc
    
    
    def printInfo(self):
        print(".....Inputs size:" + str(self.__inputs.shape))
        print(".....Outputs size:" + str(self.__outputs.shape))
        print(".....Labels:" + str(self.__labels))
        print(".....Number of layers:" + str(np.size(self.__theta_set, 0)))
        print(".....Theta Set:\n" + str.join("\n", [str(t.shape) for t in self.__theta_set]))
        print(".....Regularization parameter:" + str(self.__regular))
        print(".....Iterations:" + str(self.__iterations))
        if (self.trainingState == "Done"):
            print(".....Training Result:\n" + 
                  str(self.ResultMsg) +
                  "\nAccuracy: " + str(self.getAccuracy(self.__inputs, self.__outputs))
                  )
        else:
            print(".....Training State:" + self.trainingState)

    def __checkInput(self):
        flag = False
        if (self.__inputs.size < 4 or self.__inputs.ndim != 2):
            # Error 1001
            self.__reportError(1001)
        elif (self.__outputs.ndim != 2 or np.size(self.__outputs, 1) != self.__num__labels or np.size(self.__outputs, 0) != np.size(self.__inputs, 0)):
            # Error 1002
            self.__reportError(1002)
        elif (self.__num__labels != len(self.__labels) or len(self.__labels) < 3 or len(self.__labels) != len(list(set(self.__labels)))):
            # Error 1003
            self.__reportError(1003)
        elif (len(self.__theta_set) == 0 or np.size(self.__theta_set[0], 1) != np.size(self.__inputs, 1) or bool(sum([
                    np.size(self.__theta_set[t], 0) != (np.size(self.__theta_set[t + 1], 1) - 1)
                    for t in range(len(self.__theta_set) - 1)
                    ])) or np.size(self.__theta_set[-1], 0) != np.size(self.__outputs, 1)):
            # Error 1004
            self.__reportError(1004)
        elif (self.__regular is None or self.__iterations < 3):
            # Error 1005
            self.__reportError(1005)
        else:
            flag = True
        return flag

    def __reportError(self, errorCode, msg = ""):
        errors = {
            1001: "Inputs should be a non-empty 2-D array.",
            1002: "Outputs should be a 2-D array with len(Y) = label_size, \
                and len(X)=len(Inputs), and all values are 0 or 1 \
                (only one item=1).",
            1003: "Values in the Labels array should be unique and \
                Len(Labels)>2 ",
            1004: "For the first theta size(thata(0),1)=size(inputs,1), \
                size(thata(i),0) = size(thata(i+1),1)+1, \
                for the last one size(theta(end),0)=Len(labels)",
            1005: "Regularization should not be empty and \
                training iterations should larger than 2."
        }
        try:
            print(".....%d: %s \n\t %s \n" % (errorCode, errors[errorCode], msg))
        except KeyError as e:
            print(".....Error code is undefined.")


# Main
if __name__ == '__main__':
    # Initialize training set inputs and outputs
    matfn = "D:\\MachineLearning\\machine-learning-ex4\\ex4\\ex4data1.mat"
    data = sio.loadmat(matfn)
    inputs = data["X"]
    y = data["y"]
    m = np.size(inputs, 0)
    inputs = np.column_stack((np.ones((m, 1)), inputs))
    labels = list(set(y.flatten()))
    labels_size = len(labels)
    outputs = np.zeros((np.size(inputs, 0), labels_size))

    for i in range(0, m):
        outputs[i, labels.index(y[i, 0])] = 1

    # Initialize theta set (2 layers)
    # layer1_size = np.size(inputs, 1)
    # layer2_size = 25
    # theta2 = np.zeros((labels_size, layer2_size + 1))
    # theta1 = np.zeros((layer2_size, layer1_size))
    # theta_set = [theta1, theta2]

    # Initialize theta set (3 layers)
    layer1_size = np.size(inputs, 1)
    layer2_size = 80
    layer3_size = 25
    theta3 = np.zeros((labels_size, layer3_size + 1))
    theta2 = np.zeros((layer3_size, layer2_size + 1))
    theta1 = np.zeros((layer2_size, layer1_size))
    theta_set = [theta1, theta2, theta3]

    # Set pramaters
    iterations = 100
    regular = 1
    
    # minimize_method = 'TNC' # Fastest
    minimize_method = 'L-BFGS-B' # Fastest
    minimize_disp = False

    # Train the data set
    t_start = datetime.datetime.now()
    nerualNetwork = NerualNetwork()
    nerualNetwork.train(inputs, outputs, labels_size, labels, theta_set, 
                        regular, iterations, minimize_method, minimize_disp)

    # Print result
    nerualNetwork.printInfo()
    t_end = datetime.datetime.now()
    print("Total seconds: %f" % (t_end -t_start).total_seconds())
    
    # Test result for 3 layers
    # Accuracy: 0.9826
    # Total seconds: 6.638665