机器学习作业4编程作业(python):Neural Networks Learning

最新推荐文章于 2023-02-02 03:23:28 发布

clnnnnn

最新推荐文章于 2023-02-02 03:23:28 发布

阅读量236

点赞数

分类专栏：吴恩达机器学习课作业

本文链接：https://blog.csdn.net/weixin_45033788/article/details/109491496

版权

吴恩达机器学习课作业专栏收录该内容

5 篇文章 0 订阅

订阅专栏

英文文档图片均来自原档作业pdf截图网址https://www.coursera.org/learn/machine-learning/programming/8f3qT/linear-regression*

Neural Networks Learning

在这里插入图片描述
本次书写的代码是3层神经网络，即一层隐含层

1.前馈
先将上节课的前馈网络写成函数：
https://blog.csdn.net/Cowry5/article/details/80399350
从大佬的博客学习了：theta1和theta2要合并之后才能传给minimize

def serialize(a, b):
    '''展开参数 https://blog.csdn.net/Cowry5/article/details/80399350'''
    return np.r_[a.flatten(),b.flatten()]

def deserialize(seq):
    '''提取参数 https://blog.csdn.net/Cowry5/article/details/80399350'''
    return seq[:25*401].reshape(25, 401), seq[25*401:].reshape(10, 26)

def feedforwardnn(theta, X): # 前馈（X包括常数项）
    theta1,theta2 = deserialize(thata)
    a1 = X  # 输入层
    z2 = X @ theta1 # 隐含层输入
    a2 = sigmoid(z2)  # 隐含层输出
    a2 = np.insert(a2, 0, values=1, axis=1)  # 添加常数列
    z3 = a2 @ theta2 # 输出层输入
    a3 = sigmoid(z3)  # 输出层输出
    return a1, z2, a2, z3, a3

2.修改输出y

y = data['y'] # 分类标签 y.shape=(5000,1) # 注：仍保留label"10"

def y_init(y): #对输出向量进行设置
    new = []
    for i in y:
        yy = np.zeros(np.unique(y).shape[0])
        yy[i] = 1
        new.append(yy)
    return np.array(new) #shape=(5000, 10)

即改成这种形式：在这里插入图片描述

3.代价函数：
加上正则项：
在这里插入图片描述
hθ（x）是输出节点组成的矩阵（k个输出节点即k维向量）
正则项也不包括每层的常数1。

def cost(theta, X, y, lam): # 正则化代价函数 (y为调整后模为1的向量组成)
    a1,z2,a2,z3,a3 = feedforwardnn(theta, X)
    first = np.sum(- y * np.log(a3 + 1e-5) - (1 - y) * np.log(1 - a3+ 1e-5)) # y.shape(1,m) 要转置成列向量
    theta1,theta2 = deserialize(theta)
    theta1[0,:] = 0
    theta2[0,:] = 0
    second = np.sum(np.power(theta1,2))+np.sum(np.power(theta2,2))
    m = X.shape[0]-1
    return first/m +second/2/m*lam

初值验证：
在这里插入图片描述

print(cost(theta,X,y,1))
0.38372982089246455

4.梯度函数
重点

注意矩阵的维度*

def gra_g(z):
    return sigmoid(z)*(1-sigmoid(z))

def gradient(theta, X, y, lam): # 正则化梯度函数
    # 常数对应的梯度不需要正则化
    theta1, theta2 = deserialize(theta)
    a1, z2, a2, z3, a3 = feedforwardnn(theta, X)
    del3 = a3 - y # (5000,10)
    del2 = del3 @ theta2[1:,:].T * gra_g(z2) #(5000,25)
    D2 = a2.T @ del3 # (26,10)
    D1 = a1.T @ del2 # (401,25)
    theta1[0,:] = 0
    theta2[0,:] = 0
    m = X.shape[0] - 1
    DD2 = D2/m + theta2/m*lam
    DD1 = D1/m + theta1/m*lam
    return serialize(DD1, DD2)

梯度算出来还需要与理论值进行比对，这边先空着后面再回来补:)**
在这里插入图片描述

5.随机化初值
在这里插入图片描述

def theta_init(a,b): 
    return np.random.uniform(-0.12,0.12,(a, b))

6.使用minimize函数

import random
from scipy.optimize import minimize
   
def NN(theta, X, y, lam):
    result = minimize(fun=cost, x0=theta, args=(X, y, lam), method='TNC', jac=gradient, options={'maxiter': 400})
    return result
    
thetainit = serialize(theta_init(401,25), theta_init(26,10))
R = NN(thetainit, X, y, lam)
print(R)

please input the lambda:1
     fun: 0.8266844881847328
     jac: array([-0.00322697,  0.00046125,  0.00155233, ..., -0.00089185,
        0.0005899 ,  0.00059701])
 message: 'Converged (|f_n-f_(n-1)| ~= 0)'
    nfev: 358
     nit: 17
  status: 1
 success: True
       x: array([ 0.24906532, -0.68048419, -0.47365931, ..., -1.83687721,
       -2.32626506, -1.04057674])

7.预测结果

R = NN(thetainit, X, y, lam)
theta_final = R.x
def Prediction(theta_final, sample_x): #sample_x以ndarry类型
    sample_x = np.insert(sample_x, 0, values=1, axis=1)
    a1, z2, a2, z3, a3 = feedforwardnn(theta_final, sample_x)
    predic = np.argmax(a3, axis=1) + 1
    return predic # 一维

def Judgement(y1, predic): # 计算准确率
    result = y1 - predic.reshape(predic.shape[0],1)
    # 不为0的项（即不相等）就是判断错误的
    correct = np.sum(result == 0)
    return correct / y1.shape[0]

整合代码：

'''
三层神经网络
'''

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from scipy.io import loadmat
import random
import scipy.special
from scipy.optimize import minimize


def sigmoid(x):
      return scipy.special.expit(x)   #return 1 / (1 + exp(-inx))

def y_init(y): #对输出向量进行设置
    new = []
    for i in y:
        yy = np.zeros(np.unique(y).shape[0])
        yy[i-1] = 1  # 也就是说标签“10”在最后的位置
        new.append(yy)
    return np.array(new)

def serialize(a, b):
    '''展开参数 https://blog.csdn.net/Cowry5/article/details/80399350'''
    return np.r_[a.flatten(),b.flatten()]

def deserialize(seq):
    '''提取参数 https://blog.csdn.net/Cowry5/article/details/80399350'''
    return seq[:401*25].reshape(401, 25), seq[401*25:].reshape(26, 10)

def theta_init(a,b):
    return np.random.uniform(-0.12,0.12,(a, b))

def feedforwardnn(theta, X): # 前馈（X包括常数项）
    theta1,theta2 = deserialize(theta)
    a1 = X  # 输入层
    z2 = X @ theta1 # 隐含层输入
    a2 = sigmoid(z2)  # 隐含层输出
    a2 = np.insert(a2, 0, values=1, axis=1)  # 添加常数列
    z3 = a2 @ theta2 # 输出层输入
    a3 = sigmoid(z3)  # 输出层输出
    return a1, z2, a2, z3, a3

def cost(theta, X, y, lam): # 正则化代价函数 (y为调整后模为1的向量组成)
    a1,z2,a2,z3,a3 = feedforwardnn(theta, X)
    first = np.sum(- y * np.log(a3 + 1e-5) - (1 - y) * np.log(1 - a3+ 1e-5)) # y.shape(1,m) 要转置成列向量
    theta1,theta2 = deserialize(theta)
    theta1[0,:] = 0
    theta2[0,:] = 0
    second = np.sum(np.power(theta1,2))+np.sum(np.power(theta2,2))
    m = X.shape[0]-1
    return first/m +second/2/m*lam

def gra_g(z):
    return sigmoid(z)*(1-sigmoid(z))

def gradient(theta, X, y, lam): # 正则化梯度函数
    # 常数对应的梯度不需要正则化
    theta1, theta2 = deserialize(theta)
    a1, z2, a2, z3, a3 = feedforwardnn(theta, X)
    del3 = a3 - y # (5000,10)
    del2 = del3 @ theta2[1:,:].T * gra_g(z2) #(5000,25)
    D2 = a2.T @ del3 # (26,10)
    D1 = a1.T @ del2 # (401,25)
    theta1[0,:] = 0
    theta2[0,:] = 0
    m = X.shape[0] - 1
    DD2 = D2/m + theta2/m*lam
    DD1 = D1/m + theta1/m*lam
    return serialize(DD1, DD2)

def NN(theta, X, y, lam):
    result = minimize(fun=cost, x0=theta, args=(X, y, lam), method='TNC', jac=gradient, options={'maxiter': 400})
    return result


def Prediction(theta_final, sample_x): #sample_x以ndarry类型
    sample_x = np.insert(sample_x, 0, values=1, axis=1)
    a1, z2, a2, z3, a3 = feedforwardnn(theta_final, sample_x)
    predic = np.argmax(a3, axis=1) + 1
    return predic # 一维


def Judgement(y1, predic): # 计算准确率
    result = y1 - predic.reshape(predic.shape[0],1)
    # 不为0的项（即不相等）就是判断错误的
    correct = np.sum(result == 0)
    return correct / y1.shape[0]


def PrintRandom100ImageWithPredict(sample_x, predic, percent):  # 随机打印其中100张图片并标注预测结果
    # 创建10行10列子图
    fig = plt.figure(figsize=(6,6))
    fig.suptitle('Prediction with the precision rate :'+ str(percent*100) +'%') #总标题
    lis = random.sample(range(5000), 100) # 随机选取100个图片的序号，即X的行号
    font = {'family': 'Times New Roman', 'weight': 'normal', 'size': 8}  # 坐标轴字体
    for i in range(100):
        ax = fig.add_subplot(10, 10, i+1)
        img = sample_x[lis[i], :].reshape((20, 20))
        plt.imshow(img, cmap=plt.cm.gray)
        plt.xticks([])
        plt.yticks([])
        plt.xlabel('this is:' + str(int(predic[lis[i]])), font)
        # plt.subplots_adjust(wspace=0, hspace=-0.05) # 如果要去除图片之间的间隙
    plt.tight_layout()  # 调整子图间距
    plt.show()




path = r'D:\Ninachen\wg_machinelearning\machine-learning-ex4\ex4\ex4data1.mat'
data = loadmat(path) # dict
X1 = data['X'] # ndarray  训练集
X = np.insert(X1, 0, values=1, axis=1)  # 添加常数列
y1 = data['y'] # 分类标签 y1.shape=(5000,1)
# y[np.where(y==10)] = 0 # 将10改成0
y =  y_init(y1) # 转化为向量

# path2 = r'D:\Ninachen\wg_machinelearning\machine-learning-ex4\ex4\ex4weights.mat'
# data2 = loadmat(path2) # dict
# theta1 = data2['Theta1'].T
# theta2 = data2['Theta2'].T
# theta = serialize(theta1, theta2)
thetainit = serialize(theta_init(401,25), theta_init(26,10))

lam =float(input('please input the lambda:'))
R = NN(thetainit, X, y, lam)
theta_final = R.x

sample_x = X1

predict = Prediction(theta_final, sample_x)
percent = Judgement(y1, predict) # 准确率

predict[np.where(predict==10)] = 0 # 将10写成0
PrintRandom100ImageWithPredict(sample_x, predict, percent)

结果：
λ=1时：
在这里插入图片描述

初值不同，准确率略有不同。

clnnnnn

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
1
评论
机器学习作业4编程作业(python):Neural Networks Learning

英文文档图片均来自原档作业pdf截图网址https://www.coursera.org/learn/machine-learning/programming/8f3qT/linear-regression*Neural Networks Learning
复制链接

扫一扫