详细解释《机器学习实战》第5章logistic回归第5-1程序（梯度下降法推导）和梯度下降法实现对率回归

最新推荐文章于 2021-10-28 14:02:43 发布

渣渣张

最新推荐文章于 2021-10-28 14:02:43 发布

阅读量1.1k

点赞数 2

分类专栏：机器学习文章标签：机器学习

本文链接：https://blog.csdn.net/u014697805/article/details/77776925

版权

机器学习专栏收录该内容

12 篇文章 0 订阅

订阅专栏

从别人的参考过来的推导过程

这里写图片描述

梯度下降法实现对率回归

数据集

这里写图片描述

代码

# -*- coding: utf-8 -*-
from log import *
import pandas as pd
from pandas import *
import matplotlib.pyplot as plt
import numpy as np
from numpy import *
#读取数据到X,y
dataset = np.loadtxt(r'C:\Users\zmy\Desktop\titanic\watermelon.csv',delimiter=",")
X = dataset[0:2, :]
y = dataset[3,:]
X = X.transpose()
y = y.transpose()
df = pd.DataFrame(X, columns = ['density', 'ratio_sugar'])
m,n = shape(df.values)
df['norm'] = ones((m,1))
dataMat = array(df[['norm', 'density', 'ratio_sugar']].values[:,:])
labelMat = array(y).transpose()

def sigmoid(inX):
    return 1.0 / (1+exp(-inX))
#梯度下降法
def gradAscend(dataMatIn, classLabels):
    dataMatrix = dataMatIn
    # labelMat = mat(classLabels).transpose()
    m,n = shape(dataMatrix)
    alpha = 0.1
    maxCycle = 500
    weights = ones((n,1))
    for i in range(maxCycle):
        a = dot(dataMatrix, weights)
        # print a
        h = sigmoid(a)
        error = (labelMat - h)
        weights = weights + alpha * dataMatrix.transpose()*error
    return weights
# 随机梯度下降法
def stocGradAscend1(dataMat, labelMat, numIter =50):
    print dataMat
    print labelMat
    m, n = shape(dataMat)
    weights = ones(n)
    weights = array(weights)
    for j in range(numIter):
        dataIndex = range(m)
        for i in range(m):
            # dataIndex = range(m)
            alpha = 40 / (1.0+j+i) + 0.2
            randIndex_temp = int(random.uniform(0, len(dataIndex)))
            randIndex = dataIndex[randIndex_temp]
            h = sigmoid(sum(dataMat[randIndex]*weights))
            error = labelMat[randIndex] - h
            weights = weights + alpha * error * dataMat[randIndex]
            del(dataIndex[randIndex_temp])
    return weights

def plotBestFit(weights):
    import matplotlib.pyplot as plt
    # dataMat, labelMat = loadDataSet()
    dataArr = array(dataMat)
    n = shape(dataArr)[0]
    xcord1 = []
    ycord1 = []
    xcord2 = []
    ycord2 = []
    for i in range(n):
        if int(labelMat[i]) == 0:
            xcord2.append(dataArr[i,1]); ycord2.append(dataArr[i, 2])
        else:
            xcord1.append(dataArr[i,1]); ycord1.append(dataArr[i, 2])
    fig = plt.figure()
    ax = fig.add_subplot(111)
    #画出散点图
    ax.scatter(xcord1, ycord1, s = 30, c = 'red', marker='s',label = '1')
    ax.scatter(xcord2, ycord2, s=30, c = 'g', label = '0')
    # 设置直线的x,y上的点
    x = arange(0.2, 0.8, 0.1)
    y = array((-weights[0] - weights[1]*x)/weights[2])
    y=y.transpose()
    ax.plot(x, y)
    plt.xlabel('density')
    plt.ylabel('ratio_sugar')
    plt.legend(loc = 'upper right')
    plt.title("random gradAscent logistic regression")
    plt.show()
# 函数调用
weights = stocGradAscend1(dataMat, labelMat)
plotBestFit(weights)