对数几率回归

最新推荐文章于 2024-05-23 10:40:33 发布

Hu_Zhitao

最新推荐文章于 2024-05-23 10:40:33 发布

阅读量64

点赞数

文章标签：回归 python 数据挖掘

原文链接：https://blog.csdn.net/c406495762/category_9269492.html

版权

# 逻辑斯蒂回归
import numpy as np
import random
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
import datetime


# 加载数据
def loadDataSet():
    dataMat=[]
    labelMat=[]
    file=open(r'D:\Users\Lenovo\PycharmProjects\MachineLearning\Datasource\NeuralNetwork\testSet.txt')
    for line in file.readlines():
        lineArr=line.strip().split()
        dataMat.append([1.0, float(lineArr[0]), float(lineArr[1])])
        labelMat.append(int(lineArr[2]))
    file.close()
    return dataMat, labelMat


# sigmoid函数
def sigmoid(inX):
    return 1.0 / (1 + np.exp(-inX))


# 画图展示分类结果
def plotBestFit(weights):
    dataMat, labelMat=loadDataSet()
    print(type(dataMat))
    # 将numpy.ndarray类型转换为Array类型
    dataArr=np.array(dataMat)
    print(type(dataArr))
    # 获取数据集行数
    n=np.shape(dataMat)[0]
    xcord1=[]
    ycord1=[]
    xcord2=[]
    ycord2=[]
    for i in range(n):
        if int(labelMat[i] == 1):
            xcord1.append(dataArr[i, 1])
            ycord1.append(dataArr[i, 2])
        else:
            xcord2.append(dataArr[i, 1])
            ycord2.append(dataArr[i, 2])
    fig=plt.figure()
    plt.scatter(xcord1, ycord1, s=20, c='red', marker='s', alpha=0.5)
    plt.scatter(xcord2, ycord2, s=20, c='green')
    x=np.arange(-3, 3, 0.1)
    y=(-weights[0] - weights[1] * x) / weights[2]
    plt.plot(x, y)
    plt.xlabel('X1')
    plt.ylabel('X2')
    plt.title('BestFit')
    plt.show()


# 梯度上升法  迭代公式：θj = θj + αX.T（y - g(X*θ)）
# 使用梯度上升计算极大似然函数，最大化极大似然，也就相当于最小化损失函数，从而获得最优解
def gradAscent(dataMat, classLabels):
    """
    :param dataMat:训练集
    :param classLabels: 分类标签
    :return: 权重w0
    """
    starttime=datetime.datetime.now()
    # 将输入的特征数组处理为矩阵
    dataMatrix=np.mat(dataMat)
    # 将输入的标签向量处理为矩阵并转置
    labelMat=np.mat(classLabels).transpose()
    # 获取特征矩阵的行数和列数
    m, n=np.shape(dataMatrix)
    # 定义移动步长，也就是学习速率
    alpha=0.01
    # 最大迭代次数
    maxCycles=500
    # 初始化权重，即θ向量，开始时全为1
    weights=np.ones((n, 1))
    weights_array=np.array([])
    # 开始训练
    for k in range(maxCycles):
        # 计算损失函数
        h=sigmoid(dataMatrix * weights)
        # 计算错误率
        error=labelMat - h
        # 梯度上升矢量化公式
        weights=weights + alpha * dataMatrix.transpose() * error
        weights_array=np.append(weights_array, weights)
    weights_array=weights_array.reshape(maxCycles, n)
    finishtime=datetime.datetime.now()
    timeuse=finishtime - starttime
    return weights.getA(), weights_array, timeuse


# 改进后的梯度上升算法 随机梯度上升
def stocGradAscent1(dataMatrix, classLabels, numIter=100):
    """
    :param dataMatrix: 数据集
    :param classLabels: 标签向量
    :param numIter: 迭代次数
    :return: 最优回归系数
    """
    starttime=datetime.datetime.now()
    # 返回dataMatrix的大小。m为行数,n为列数。
    m, n=np.shape(dataMatrix)
    # 参数初始化
    weights=np.ones(n)
    # 存储每次更新的回归系数
    weights_array=np.array([])
    for j in range(numIter):
        dataIndex=list(range(m))
        for i in range(m):
            # 降低alpha的大小，每次减小1/(j+i)。
            alpha=4 / (1.0 + j + i) + 0.01
            # 随机选取样本
            randIndex=int(random.uniform(0, len(dataIndex)))
            # 选择随机选取的一个样本，计算h
            h=sigmoid(sum(dataMatrix[randIndex] * weights))
            # 计算误差
            error=classLabels[randIndex] - h
            # 更新回归系数
            weights=weights + alpha * error * dataMatrix[randIndex]
            # 添加回归系数到数组中
            weights_array=np.append(weights_array, weights, axis=0)
            # 删除已经使用的样本
            del (dataIndex[randIndex])
    # 改变维度
    weights_array=weights_array.reshape(numIter * m, n)
    finishtime=datetime.datetime.now()
    timeuse=finishtime - starttime
    return weights, weights_array, timeuse  # 返回


# 绘制回归系数与迭代次数的关系，看出随机梯度上升和全局梯度上升
def plotWeights(weights_array1, weights_array2):
    # 获得字体对象
    font=FontProperties(fname=r"c:\windows\fonts\simsun.ttc", size=14)
    # 将画布分成三行两列，六个区域，不共享x轴，y轴
    fig, axs=plt.subplots(nrows=3, ncols=2, sharex=False, sharey=False, figsize=(20, 10))
    x1=np.arange(0, len(weights_array1), 1)
    # 绘制w0与迭代次数的关系
    axs[0][0].plot(x1, weights_array1[:, 0])
    axs0_title_text=axs[0][0].set_title(u'随机梯度上升：回归系数与迭代次数关系', FontProperties=font)
    axs0_ylabel_text=axs[0][0].set_ylabel(u'W0', FontProperties=font)
    plt.setp(axs0_title_text, size=20, weight='bold', color='black')
    plt.setp(axs0_ylabel_text, size=20, weight='bold', color='black')
    # 绘制w1与迭代次数的关系
    axs[1][0].plot(x1, weights_array1[:, 1])
    axs1_ylabel_text=axs[1][0].set_ylabel(u'W1', FontProperties=font)
    plt.setp(axs1_ylabel_text, size=20, weight='bold', color='black')
    # 绘制w2与迭代次数的关系
    axs[2][0].plot(x1, weights_array1[:, 2])
    axs2_xlabel_text=axs[2][0].set_xlabel(u'迭代次数', FontProperties=font)
    axs2_ylabel_text=axs[2][0].set_ylabel(u'W1', FontProperties=font)
    plt.setp(axs2_xlabel_text, size=20, weight='bold', color='black')
    plt.setp(axs2_ylabel_text, size=20, weight='bold', color='black')
    x2=np.arange(0, len(weights_array2), 1)
    # 绘制w0与迭代次数的关系
    axs[0][1].plot(x2, weights_array2[:, 0])
    axs0_title_text=axs[0][1].set_title(u'全局梯度上升：回归系数与迭代次数关系', FontProperties=font)
    axs0_ylabel_text=axs[0][1].set_ylabel(u'W0', FontProperties=font)
    plt.setp(axs0_title_text, size=20, weight='bold', color='black')
    plt.setp(axs0_ylabel_text, size=20, weight='bold', color='black')
    # 绘制w1与迭代次数的关系
    axs[1][1].plot(x2, weights_array2[:, 1])
    axs1_ylabel_text=axs[1][1].set_ylabel(u'W1', FontProperties=font)
    plt.setp(axs1_ylabel_text, size=20, weight='bold', color='black')
    # 绘制w2与迭代次数的关系
    axs[2][1].plot(x2, weights_array2[:, 2])
    axs2_xlabel_text=axs[2][1].set_xlabel(u'迭代次数', FontProperties=font)
    axs2_ylabel_text=axs[2][1].set_ylabel(u'W1', FontProperties=font)
    plt.setp(axs2_xlabel_text, size=20, weight='bold', color='black')
    plt.setp(axs2_ylabel_text, size=20, weight='bold', color='black')
    plt.show()


# 主函数
if __name__ == '__main__':
    dataMat, labelMat=loadDataSet()
    weights1, weights_array1, timeuse1=stocGradAscent1(np.array(dataMat), labelMat)
    weights2, weights_array2, timeuse2=gradAscent(dataMat, labelMat)
    print(weights_array1)
    print(weights_array2)
    print("随机梯度上升用时:", timeuse1)
    print("全局梯度上升用时:", timeuse2)
    plotWeights(weights_array1, weights_array2)

Hu_Zhitao

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
对数几率回归

# 逻辑斯蒂回归import numpy as npimport randomimport matplotlib.pyplot as pltfrom matplotlib.font_manager import FontPropertiesimport datetime# 加载数据def loadDataSet(): dataMat=[] labelMat=[] file=open(r'D:\Users\Lenovo\PycharmProjects\MachineL
复制链接

扫一扫