逻辑回归识别0-9数字-吴恩达-机器学习

最新推荐文章于 2024-05-11 21:41:26 发布

なつの目

最新推荐文章于 2024-05-11 21:41:26 发布

阅读量904

点赞数 1

分类专栏：吴恩达-机器学习文章标签：机器学习逻辑回归 python

本文链接：https://blog.csdn.net/qq_43494330/article/details/121441475

版权

吴恩达-机器学习专栏收录该内容

5 篇文章 0 订阅

订阅专栏

#导入模块
import matplotlib.pyplot as plt
import numpy as np
from scipy.io import loadmat
import matplotlib
import scipy.optimize as opt
from sklearn.metrics import classification_report#这个包是评价报告

第一步数据可视化

#导入数据
data =loadmat('ex3data1.mat')
data.keys()
dict_keys(['__header__', '__version__', '__globals__', 'X', 'y'])
X =data['X']
y =data['y']

#输出一个数字
ix =np.random.randint(0,5000)
a =X[ix,:]
#输入一个数字图像
b =a.reshape(20,20)
fig,ax =plt.subplots(figsize =(8,7))
ax.matshow(b)
ax.matshow(b,cmap =matplotlib.cm.binary)

输出100个手写体数字

#数字是旋转的需要对其进行转置
X1 =np.array([im.reshape(20,20).T for im in X])
X_T =np.array([im.reshape(400) for im in X1])

#输出100个数字图像
#随机选取X中的100个数字图像
image_index =np.random.choice(X_T.shape[0],100)
image_100 =X_T[image_index,:]
#fig,ax =plt.subplots(figsize =(8,7))

fig1,ax =plt.subplots(nrows =10,ncols =10,figsize =(10,10),sharey=True, sharex=True,)

for i in range(10):
    for j in range(10):
        #不要下标
        plt.xticks(np.array([])) 
        plt.yticks(np.array([]))
        ax[i,j].matshow(image_100[i*10+j,:].reshape(20,20),cmap =matplotlib.cm.binary)

第二步：向量化逻辑回归，损失函数和迭代梯度（含正则化）

#训练图像特征X_T,训练标签y1
y1 =y.reshape(y.shape[0])
print(X_T.shape,y1.shape)
print(type(X_T),type(y1))

#特征数据第一列插入1值
trainX =np.insert(X_T,0,values =np.ones(X_T.shape[0]),axis =1)

ln =[]
#对数据y1进行处理，例如只看数字10为标签，则10的位置为1，其他地方为0
#因此，对10，1，2.。。数字进行分类标签处理，每个数字映射为0,1标签（5000*1）
for num in range(1,11):
    ln.append((y1==num).astype(int))
ln =[ln[-1]]+ln[:-1]
trainy =np.array(ln)

#训练图像特征dataX,训练标签datay
print(trainX.shape,trainy.shape)
print(type(trainX),type(trainy))

(5000, 401) (10, 5000)
<class 'numpy.ndarray'> <class 'numpy.ndarray'>

定义损失函数与梯度函数

#定义损失函数
def cost(theta,X,y):
    '''
    输入：theta 参数，X 特征数据 ，y标签数据
    输出：cost 损失值
    '''
    #假设函数
    h =X@theta
    g =1/(1+np.exp(-h))
    #损失值
    first =-y*np.log(g)
    second =(1-y)*(np.log(1-g))
    return np.mean(first-second)

#定义梯度迭代函数
def gradient(theta,X,y):
    '''
    输入：theta 参数，X 特征数据 ，y标签数据
    输出：迭代后的参数θ值 
    '''
    #假设函数
    h =X@theta
    g =1/(1+np.exp(-h))
    
    temp =g-y
    return (X.T@temp)/len(X)

#定义正则化损失函数与梯度函数
#定义正则化损失函数
def cost_regularize(theta,X,y,lamda=1):
    '''
    输入：theta 参数28*1，X 特征数据 118*28，y标签数据118*1,l 正则化参数lamda，默认为1
    输出：cost 损失值
    '''
    third =np.sum(np.power(theta,2))*lamda/(len(X)*2)
    return cost(theta,X,y)+third

#定义正则化梯度迭代函数
def gradient_regularize(theta,X,y,lamda=1):
    '''
    输入：theta 参数28*1，X 特征数据 118*28，y标签数据118*1,l 正则化参数lamda，默认为1
    输出：迭代后的参数θ值 28*1
    注：theta0的梯度不用正则化
    '''
    #正则化
    theta_reg =theta*lamda/(len(X))
    theta_reg[0] =0
    return gradient(theta,X,y)+theta_reg

第三步：分类和预测

先做一分类，识别数字0

#先做一分类，识别数字0
trainy0 =trainy[0]
trainy0.shape
(5000,)


#定义正则化训练模型
def logistic(X,y,lamda =1):
    #初始化参数theta
    theta =np.zeros(trainX.shape[1])
    #初始损失值
    print('初始损失值：',cost(theta,X,y))
    #训练模型
    result =opt.minimize(fun =cost_regularize,x0 =theta, args=(X,y,lamda),method ='TNC',jac =gradient_regularize)
    #终止损失值
    print('最终损失值：',result.fun)
    return result.x

theta_final=logistic(trainX,trainy0)

#模型评价
h =trainX@theta_final
g =1/(1+np.exp(-h))
predict =[]
for i in g:
    if i>=0.5:
        predict.append(1)
    else:
        predict.append(0)
print(classification_report(trainy0,predict))

0-9多分类训练

theta_k =[]
for i in range(10):
    theta_k.append(logistic(trainX,trainy[i]))
theta_k =np.array(theta_k)
theta_k.shape

#将训练好的10个数字的theta参数代入训练特征中，计算预测结果，并对其进行评价
#定义sigmoid函数
def sigmoid(x):
    f =1/(1+np.exp(-x))
    return f
p_k = sigmoid(trainX @theta_k.T)
p_k.shape,p_k

#多分类，不能直接利用数值大于0.5或小于0.5来判断0-1分类
#p_k 5000*10 ,要得到预测的结果是5000*1，
#也就是说，每一行中数值最大的那个数对应的数字预测结果可能性最大
#所以每一行返回数值最大的对应的下标，将5000*10映射为5000*1,便是结果
predict_y =np.argmax(p_k,axis =1)
real_y =y.copy()
real_y[real_y==10] =0
print(classification_report(real_y,predict_y))

なつの目

关注

1
点赞
踩
3

收藏

觉得还不错? 一键收藏
打赏
0
评论
逻辑回归识别0-9数字-吴恩达-机器学习

#导入模块import matplotlib.pyplot as pltimport numpy as npfrom scipy.io import loadmatimport matplotlibimport scipy.optimize as optfrom sklearn.metrics import classification_report#这个包是评价报告第一步数据可视化#导入数据data =loadmat('ex3data1.mat')data.keys()di.
复制链接

扫一扫