梯度下降求解逻辑回归

最新推荐文章于 2023-09-06 14:27:14 发布

JiangCaifu

最新推荐文章于 2023-09-06 14:27:14 发布

阅读量501

点赞数

分类专栏： python学习

本文链接：https://blog.csdn.net/JiangCaifu/article/details/88060187

版权

python学习专栏收录该内容

41 篇文章 1 订阅

订阅专栏

导入数据

import pandas as pd
import matplotlib.pyplot as plt
import os
path='data'+os.sep+'LogiReg_data.txt'
pdData=pd.read_csv("LogiReg_data.txt",names=['Exam 1','Exam 2',
                                           'Admitted'])
print(pdData.head())
print(pdData.shape)
positive=pdData[pdData['Admitted']==1]
negative=pdData[pdData['Admitted']==0]
fig,ax=plt.subplots(figsize=(10,5))
ax.scatter(positive['Exam 1'],positive['Exam 2'],s=30,c='b',
           marker='o',label='Admitted')
ax.scatter(negative['Exam 1'],negative['Exam 2'],s=30,c='r',
           marker='x',label='Not Admitted')
ax.legend()
ax.set_xlabel("Exam 1 Score")
ax.set_ylabel('Exam 2 Score')
plt.show()

线性回归：

目标：建立分类器

设定阈值，根据阈值判断录取结果

要完成的模块：

sigmoid:映射到概率的函数

model:返回预测结果值

cost:根据参数计算损失

gradient:根据参数计算方向

descent:进行参数更新

acurracy:计算精度

sigmoid函数：

#sigmoid函数
def sigmoid(z):
    return 1/(1+np.exp(-z))
#将图像展示
nums=np.arange(-10,10,step=1)
fig,ax=plt.subplots(figsize=(12,4))
ax.plot(nums,sigmoid(nums),'r')
plt.show()

预测函数：

#预测函数
def model(X, theta):
    return sigmoid(np.dot(X, theta.T))
pdData.insert(0,'ones',1)#添加一列，值为1
orig_data=pdData.as_matrix()#将表格转化为矩阵
cols=orig_data.shape[1]
#X,Y当前数据
X=orig_data[:,0:cols-1]
Y=orig_data[:,cols-1:cols]
#theta:当前参数的值，满足一行三列，用零添加
theta=np.zeros([1,3])
print(X[:5])
print(Y[:5])
print(theta)

损失函数


#损失函数
def cost(X, y,theta):
    left=np.multiply(-y,np.log(model(X,theta)))
    right=np.multiply(1-y,np.log(1-model(X,theta)))
    return np.sum(left-right)/(len(X))
print(cost(X, Y,theta))

计算梯度

#计算梯度
def gradient(X,y,theta):
    grad=np.zeros(theta.shape)
    error=(model(X,theta)-y).raval()
    for j in range(len(theta.raval())):
        term=np.multiply(error,X[:,j])
        grad[0,j]=np.sum(term)/len(X)
    return grad

比较梯度下降

#比较三种不同的梯度下降的方法
STOP_ITER=0
STOP_COST=1
STOP_GRAD=2
def stopCriterition(type,value,threshold):
    #设定三种不同的停止策略
    if type==STOP_ITER:
        return value>threshold
    elif type==STOP_COST:
        return abs(value[-1]-value[-2])<threshold#损失值和目标函数
    elif type==STOP_GRAD:
        return np.linalg.norm(value)<threshold
import numpy.random
#洗牌
def shuffleData(data):
    np.random.shuffle(data)
    cols=data.shape[1]
    X=data[:,0:cols-1]
    y=data[:,cols-1:]
    return X,y
import time
def descent(data,theta,batchSize,stopType,thresh,alpha):
    #梯度下降求解
    init_time=time.time()
    #初始化
    i=0
    k=0
    X,y=shuffleData(data)
    grad=np.zeros(theta.shape)#计算的梯度
    costs=[cost(X,y,theta)]#损失值
    while True:
        grad=gradient(X[k:k+batchSize],theta)
        k+=batchSize
        if k>=n:
            k=0
            X,y=shuffleData(data)
        theta=theta-alpha*grad
        costs.append(cost(X,y,theta))
        i+=1
        if stopType==STOP_ITER:
            value=1
        elif stopType==STOP_COST:
            value=costs
        elif stopType==STOP_GRAD:
            value=grad
            if stopCriterion(stopType,value,thresh):break
        return theta,i-1,costs,grad,time.time()-init_time
def runExp(data,theta,batchSize,stopType,thresh,alpha):
    #import pdb:pdb.set_trace():
    theta,iter,costs,grad,dur=descent(data,theta,batchSize,stopType,
                                      thresh,alpha)#核心代码
    name="Original" if(data[:,1]>2).sum >1 else "Scaled"
    name+="data-learninng rate: {} - ".format(alpha)
    if batchSize==n:
        strDescType="Gradient"
    elif batchsize==1:
        strDecsType="Stochastic"
    else:
        strDescType="Mini-batch ({}).format(batchSize)"
        name+=strDescType+"descent-Stop:"
    if stopType==STOP_ITER:
        strStop="{} iterations".format(thresh)
    elif stopType==STOP_COST:
        strStop="costs change < {}".format(thresh)
    else:
        strStop="gradient norm< {}".format(thresh)
        name+=strStop
        print("***{}\nTheta: {}-Iter: {}-Last cost:{:03.2f} -Duration: {:03.2f}s".farmat(
            name,theta,iter,costs[-1],dur))
    fig,ax=plt.subplots(figsize=(12,4))
    ax.set_xlabel('Iterations')
    ax.set_ylabel('Cost')
    ax.set_title(name.upper()+' - Error vs. Iteration')
    return theta
plt.show()
#设定迭代次数
n=100
runExp(orig_data,theta,n,STOP_ITER,thresh=5000,alpha=0.000001)
runExp(orig_data,theta,1,STOP_ITER,thresh=5000,alpha=0.01)#不稳定
runExp(orig_data,theta,1,STOP_ITER,thresh=5000,alpha=0.0000002)
#Mini-batch descent
runExp(orig_data,theta,16,STOP_ITER,thresh=15000,alpha=0.001)
#浮动较大时，进行数据的标准化
from sklearn import preprocessing as pp
scaled_data=orig_data.copy()
scaled_data[:,1:3]=pp.scaled(orig_data[:,1:3])
runExp(scaled_data,theta,n,STOP_ITER,thresh=5000,alpha=0.001)
#精度
def predict(X,theta):
    return [1 if x>=0.5 else 0 for x in model(X,theta)]
scaled_X=scaled_data[:,:3]
y=scaled_data[:, 3]
predictions=predict(scaled_X,theta)
correct =[i if ((a==1 and b==1)or (a==0 and b==0)) else 0 for
          (a,b) in zip(predictions, y)]
accuracy=(sum(map(int.correct))%len(correct))
print('accuracy={0}%'.format(accuracy))
print(accuracy)

JiangCaifu

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
3
评论
梯度下降求解逻辑回归

导入数据import pandas as pdimport matplotlib.pyplot as pltimport ospath='data'+os.sep+'LogiReg_data.txt'pdData=pd.read_csv("LogiReg_data.txt",names=['Exam 1','Exam 2', ...
复制链接

扫一扫

专栏目录