Logistic回归

最新推荐文章于 2015-07-18 09:05:28 发布

zilangboya

最新推荐文章于 2015-07-18 09:05:28 发布

阅读量434

点赞数

分类专栏：数据挖掘文章标签： Python Logistic回归

本文链接：https://blog.csdn.net/u012293522/article/details/46715461

版权

数据挖掘专栏收录该内容

8 篇文章 0 订阅

订阅专栏

# -*-coding:utf-8-*-
'''
    逻辑回归算法
'''
from __future__ import division
import numpy as np
import matplotlib.pyplot as plt
import random

def loadDataSet():
    '''
        训练数据 ---> 训练数据列表,分类标号向量
    '''
    dataMat = []
    labelMat = []
    f = open('testSet.txt')
    for line in f.readlines():
        lineArr = line.strip().split()
        dataMat.append([1.0,float(lineArr[0]),float(lineArr[1])])   # 三维训练数据列表
        labelMat.append(int(lineArr[2]))    # 分类标号向量 
    return dataMat,labelMat

def sigmoid(inX):
    '''
    inX = W(T)*X = W1*X1 + W2*X2 + W3*X3 + ...
        阶跃函数
    '''
    return 1.0/(1+np.exp(-inX))   

def gradAscent(dataMatIn,classLabels):
    '''
            梯度上升算法,每更新一次w,要遍历所有的训练数据
    '''
    _dataMatrix = np.mat(dataMatIn)  # 训练数据列表 ---> 数据矩阵 m*n
    _labelMat = np.mat(classLabels).transpose()  # 分类标号列表 --->分类标号单行矩阵 ---> 转制单列矩阵
    m,n = np.shape(_dataMatrix)  # 矩阵的行、列
    _alpha = 0.001   # 向目标移动的步长
    _maxCycles = 500 # 迭代次数
    weights = np.ones((n,1))   # 初始为全1的系数矩阵
    for k in range(_maxCycles):
        h = sigmoid(_dataMatrix * weights)    # h = sigmod(x1*w1 + x2*w2 + ...)
        _error = _labelMat - h   # 计算结果与真实值之差
        weights = weights + _alpha * _dataMatrix.transpose() * _error  # 每循环一次就更新一次系数
    return weights

def stocGradAscent0(dataMatIn,classLabels):
    '''
         随机梯度上升算法,对每一个训练数据，更新一次w
    '''
    _dataMatrix = np.array(dataMatIn)   # 训练数据矩阵格式化
    m,n = np.shape(_dataMatrix) # m个数据，n个特征
    _alpha = 0.01    # 更新步长 
    weights = np.ones(n)    # 初始化为全1的w列表
    for i in range(m):  # 对每一训练数据
        h = sigmoid(sum(_dataMatrix[i]*weights))  #　sum(x0*w0 + x1*w1 + x2*w2)
        _error = classLabels[i] - h
        weights = weights + _alpha * _error * _dataMatrix[i]
    return weights

def stocGradAscent1(dataMatIn, classLabels, numIter=4000):
    '''
        增加计算次数的随机梯度上升算法
        算法的改进：
        1. 步长alpha随迭代次数不断减小
        2. 随机选择样本点更新回归系数
    '''
    _dataMatrix = np.array(dataMatIn)
    m,n = np.shape(_dataMatrix)
    weights = np.ones(n)
    x0 = []
    x1 = []
    x2 = []
    for i in range(numIter):
        _dataIndex = range(m)   #　[0,1,2,...m]
        for j in range(m):
            _alpha = 4/(1.0 + i + j) + 0.01     # 每次迭代训练数据调整步长
            _randIndex = int(random.uniform(0,len(_dataIndex))) # 随机生成一个范围内整数,在一次训练中，是随机无序加载训练数据的
            h = sigmoid(sum(_dataMatrix[_randIndex] * weights)) # sum(x0*w0 + x1*w1 + x2*w2)
            error = classLabels[_randIndex] - h
            weights = weights + _alpha*error*_dataMatrix[_randIndex]    # 对每一个训练数据，更新w
            del(_dataIndex[_randIndex]) #　遍历一个训练数据后，删除该数据
        x0.append(weights[0])   #参数学习过程--->x0,然后在途中展示
        x1.append(weights[1])
        x2.append(weights[2])
    plotime(x0,x1,x2)
    return weights

def plotime(x0,x1,x2):
    '''
        4000次学习过程中，w的变化过程
    '''
    fig = plt.figure()
    ax1 = fig.add_subplot(311)
    ax2 = fig.add_subplot(312)
    ax3 = fig.add_subplot(313)
    x = np.linspace(1,4000,4000)
    plt.sca(ax1)
    plt.xlim(0,4000,500)
    plt.ylim(8,15,0.5)
    plt.ylabel('X0')
    plt.plot(x,x0)
    
    plt.sca(ax2)
    plt.xlim(0,4000,500)
    plt.ylim(0,3,0.2)
    plt.ylabel('X1')
    plt.plot(x,x1)
    
    plt.sca(ax3)
    plt.xlim(0,4000,500)
    plt.ylim(-3,0,0.2)
    plt.ylabel('X2')
    plt.plot(x,x2)
    
    plt.show()

def plotBestFit(weights):
    '''
        画出决策边界
    '''
    weights = weights.getA()    # 返回本身，以矩阵形式
    dataMat,labelMat = loadDataSet()    # 列表训练数据 分类标号
    dataArr = np.array(dataMat) # 矩阵格式
    n = np.shape(dataArr)[0]    # 训练数据个数，行
    xcord1 = []
    ycord1 = []
    xcord2 = []
    ycord2 = []
    for i in range(n):  #　对每一训练列表
        if int(labelMat[i]) == 1:   #　根据分类标号分类训练数据：０和１,在图中分开显示
            xcord1.append(dataArr[i,1])
            ycord1.append(dataArr[i,2])
        else:
            xcord2.append(dataArr[i,1])
            ycord2.append(dataArr[i,2])
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.scatter(xcord1,ycord1,s=30,c='red',marker='s')
    ax.scatter(xcord2,ycord2,s=30,c='green')
    x = np.arange(-3.0,3.0,0.1)
    y = (-weights[0] - weights[1]*x)/weights[2]
    ax.plot(x,y)
    plt.xlabel('X1')
    plt.ylabel('X2')
    plt.show()

zilangboya

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
Logistic回归

# -*-coding:utf-8-*-''' 逻辑回归算法'''from __future__ import divisionimport numpy as npimport matplotlib.pyplot as pltimport randomdef loadDataSet(): ''' 训练数据 ---> 训练数据列表,分类标号向量
复制链接

扫一扫