【机器学习】吴恩达 3.x手写数字识别

幸馥儿~

已于 2023-09-14 12:40:03 修改

阅读量92

点赞数

分类专栏：机器学习文章标签：机器学习人工智能

于 2023-09-11 16:27:14 首次发布

本文链接：https://blog.csdn.net/Alita07/article/details/132811564

版权

机器学习专栏收录该内容

7 篇文章 0 订阅

订阅专栏

3.0 逻辑回归

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.io as sio
from scipy.optimize import minimize
data=sio.loadmat('ex3data1.mat')
#print(data.keys())['__header__', '__version__', '__globals__', 'X', 'y']
raw_X=data['X']
raw_Y=data['y']
print(raw_Y)
#print(raw_X.shape)(5000, 400) 每个数字是20*20的像素点，20*20=400，共有5000个数
#print(raw_Y.shape)(5000, 1)

#随机画一个
# def plot_an_image(X):
#     pick_one=np.random.randint(5000)
#     image=X[pick_one,:]
#     fig,ax=plt.subplots(figsize=(1,1))
#     ax.imshow(image.reshape(20,20).T,cmap='gray_r')
#     plt.xticks([])
#     plt.yticks([])
#
# plot_an_image(raw_X)
# plt.show()

def plot_100_image(X):
    sample_index=np.random.choice(len(X),100)#从训练集中随机选择100个数据
    images=X[sample_index,:]
    fig,ax=plt.subplots(ncols=10,nrows=10,figsize=(8,8),sharex=True,sharey=True)
    for r in range(10):
        for c in range(10):
            ax[r,c].imshow(images[10*r+c].reshape(20,20).T,cmap='gray_r')
            #去掉坐标轴
    plt.xticks([])
    plt.yticks([])
    plt.show()
# plot_100_image(raw_X)
# plt.show()


#sigmoid
def sigmoid(z):
    return 1/(1+np.exp(-z))
#损失函数
def computeCost(theta,X,y,lr):
    first=np.multiply(y,np.log(sigmoid(X@theta)))
    second=np.multiply((1-y),np.log(1-sigmoid(X@theta)))
    reg=np.sum(np.power(theta[1:],2))*(lr/(2*len(X)))
    return -(np.sum(first+second)/(len(X)))+reg

#梯度下降更新参数，加入正则化
def gradient_reg(theta,X,y,lamda):
    reg=theta[1:]*(lamda/len(X))
    reg=np.insert(reg,0,values=0,axis=0)#第一行插入0
    first=(X.T@(sigmoid(X@theta)-y))/len(X)
    return first+reg

#数据处理
X=np.insert(raw_X,0,values=1,axis=1)#在x中插入一列1
# print(X.shape)(5000, 401)
y=raw_Y.flatten()#降维
# print(y.shape)(5000,)

def one_vs_all(X,y,lamda,K):#K=10个分类器
    n=X.shape[1]#列数401
    theta_all=np.zeros((K,n))#(10,401)
    for i in range(1,K+1):
        theta_i=np.zeros(n,)#传入minimize必须是一维
        res=minimize(fun=computeCost,x0=theta_i,args=(X,y==i,lamda),method='TNC',jac=gradient_reg)#fun 要优化的函数，method 优化方法，x0 参数初始值，jac 梯度向量
        theta_all[i-1,:]=res.x#对应0-9
    return theta_all

lamda=1
K=10
theta_final=one_vs_all(X,y,lamda,K)
print(theta_final)

def predict(X,theta_final):#准确率
    h=sigmoid(X@theta_final.T)
    h_argmax=np.argmax(h,axis=1)#按列返回概率最大的数字索引
    return h_argmax+1

y_pred=predict(X,theta_final)
acc=np.mean(y_pred==y)
print(acc)

3.1 使用神经网络

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.io as sio
from scipy.optimize import minimize
#给给定最优权值
data=sio.loadmat('ex3data1.mat')
#print(data.keys())['__header__', '__version__', '__globals__', 'X', 'y']
raw_X=data['X']
raw_Y=data['y']
X=np.insert(raw_X,0,values=1,axis=1)
y=raw_Y.flatten()

theta=sio.loadmat('ex3weights.mat')#权重
#print(theta.keys())(['__header__', '__version__', '__globals__', 'Theta1', 'Theta2'])
theta1=theta['Theta1']
theta2=theta['Theta2']
#print(theta1.shape,theta2.shape)(25, 401) (10, 26)

#sigmoid
def sigmoid(z):
    return 1/(1+np.exp(-z))

a1=X#(5000, 401)

z2=a1@theta1.T
a2=sigmoid(z2)#(5000, 25)
a2=np.insert(a2,0,values=1,axis=1)

z3=a2@theta2.T
a3=sigmoid(z3)#(5000, 10)

y_pred=np.argmax(a3,axis=1)
y_pred=y_pred+1
acc=np.mean(y_pred==y)
print(acc)#0.9752

4.0 神经网络反向传播

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.io as sio
from scipy.optimize import minimize

data=sio.loadmat('ex4data1.mat')
raw_X=data['X']
raw_Y=data['y']
X=np.insert(raw_X,0,values=1,axis=1)
#print(X.shape)(5000, 401)

#对y进行one- hot编码
def one_hot_encoder(raw_y):
    result=[]#存放结果
    for i in raw_y:#1-10
        y_temp=np.zeros(10)#初始化为全0
        y_temp[i-1]=1#i的值对应的索引 i-1为对应的位置 0-9
        result.append(y_temp)
    return np.array(result)

y=one_hot_encoder(raw_Y)
#print(y,y.shape)(5000, 10)

#初试权重参数
theta=sio.loadmat('ex4weights.mat')
theta1,theta2=theta['Theta1'],theta['Theta2']
#print(theta1.shape,theta2.shape)(25, 401) (10, 26)

#序列化权重参数
def serialize(a,b):
    return np.append(a.flatten(),b.flatten())#降成一维

theta_serialize=serialize(theta1,theta2)
#print(theta_serialize.shape)(10285,)

#解序列化权重参数
def deserialize(theta_serialize):
    theta1=theta_serialize[:25*401].reshape(25,401)
    theta2=theta_serialize[25*401:].reshape(10,26)
    return theta1,theta2

#theta1,theta2=deserialize(theta_serialize)
#print(theta1.shape,theta2.shape)(25, 401) (10, 26)

def sigmoid(z):
    return 1/(1+np.exp(-z))

#向前传播
def feed_forward(theta_serialize,X):
    theta1, theta2 = deserialize(theta_serialize)
    a1 = X  # (5000, 401)

    z2 = a1 @ theta1.T
    a2 = sigmoid(z2)  # (5000, 25)
    a2 = np.insert(a2, 0, values=1, axis=1)

    z3 = a2 @ theta2.T
    h = sigmoid(z3)  # (5000, 10)
    return a1,z2,a2,z3,h

#损失函数 无正则化
def cost(theta_serialize,X,y):
    a1,z2,a2,z3,h=feed_forward(theta_serialize,X)
    J=-np.sum(y*np.log(h)+(1-y)*np.log(1-h))/len(X)
    return J

#print(cost(theta_serialize,X,y))0.2876291651613189

#损失函数 正则化
def reg_cost(theta_serialize,X,y,lamda):
    sum1=np.sum(np.power(theta1[:,1:],2))#第一列不做正则化
    sum2=np.sum(np.power(theta2[:,1:],2))
    reg=(sum1+sum2)*lamda/(2*len(X))
    return reg+cost(theta_serialize,X,y)

#print(reg_cost(theta_serialize,X,y,1))0.38376985909092365

#反向传播梯度
def sigmoid_gradient(z):
    return sigmoid(z)*(1-sigmoid(z))

def gradient(theta_serialize,X,y):
    theta1,theta2=deserialize(theta_serialize)
    a1,z2,a2,z3,h=feed_forward(theta_serialize,X)
    d3=h-y
    d2=d3@theta2[:,1:]*sigmoid_gradient(z2)
    D2=(d3.T@a2)/len(X)
    D1=(d2.T@a1)/len(X)
    return serialize(D1,D2)
#带正则化
def reg_gradient(theta_serialize,X,y,lamda):
    D=gradient(theta_serialize,X,y)
    D1,D2=deserialize(D)
    theta1,theta2=deserialize(theta_serialize)
    D1[:,1:]=D1[:,1:]+theta1[:,1:]*lamda/len(X)
    D2[:,1:]=D2[:,1:]+theta2[:,1:]*lamda/len(X)
    return serialize(D1,D2)

# #梯度检验 解决细微的bug问题
# #不带正则项的
# def num_Gradient(X,y,theta):
#     numgrad=np.zeros(theta.shape)
#     perturb=np.zeros(theta.shape)
#     e=1e-4
#     for p in range(len(theta)):
#         perturb[p]=e
#         loss1=cost(theta-perturb,X,y)
#         loss2=cost(theta+perturb,X,y)
#         numgrad[p]=(loss2-loss1)/(2*e)#无限逼近一点的值
#     return numgrad
#
# predict_gradient=gradient(theta,X,y)

# 神经网络的优化
lamda =10

def nn_training(X, y):
    init_theta = np.random.uniform(-0.5, 0.5, 10285)
    # 为解决对称权重问题，以防止对同一神经元的所有参数/权重都相等，需要随机地对初始的参数/权重赋值。一般说来初始化范围为，例如若随机生成的值属于，则只需即可。

    res = minimize(fun=reg_cost, x0=init_theta, args=(X, y, lamda), method='TNC', jac=reg_gradient,
                   options={'maxiter': 300})
    return res


res = nn_training(X, y)

raw_Y = data['y'].reshape(5000, )

_, _, _, _, h = feed_forward(res.x, X)
y_pred = np.argmax(h, axis=1) + 1#取出元素中最大值对应的索引
acc = np.mean(y_pred == raw_Y)

print(acc)#0.9404

#可视化隐藏层
def plot_hidden_layer(theta):
    theta1,_=deserialize(theta)
    hidden_layer=theta1[:,1:]#25,400
    fig,ax=plt.subplots(ncols=5,nrows=5,figsize=(8,8),sharex=True,sharey=True)
    for r in range(5):
        for c in range(5):
            ax[r,c].imshow(hidden_layer[5*r+c].reshape(20,20).T,cmap='gray_r')
    plt.xticks([])
    plt.yticks([])
    plt.show()
plot_hidden_layer(res.x)