logistic回归分类器

'''
Created on 2017年12月29日

@author: XiongYan
'''
#logistic回归分类器:
import numpy as np
import matplotlib.pyplot as plt
#创建数据:只用调用一次:
def create_dataset():
    nums = 300
    X0 = np.ones(nums)
    X1 = np.linspace(-8, 10, nums)
    # np.random.shuffle(X1)
    X2 = 5./3.*X1 + 5.
    error = np.random.normal(1, 12, (nums,))
    _X2_ = X2 + error
    label = np.zeros(nums, dtype=np.int32)
    for i  in range(len(error)):
        if error[i] > 0:
            label[i] = 1
    # print(label.dtype)
    # data = np.hstack((X0.reshape(-1, 1), X1.reshape(-1, 1), _X2_.reshape(-1, 1))) # ?????
    data = np.column_stack((X0, X1, _X2_))
    with open('data.txt', 'w') as fw:
        for i in range(len(data)):
            fw.write(','.join(map(str, data[i]))+','+str(label[i]))
            if i < len(data)-1:
                fw.write('\n')
#create_dataset()
#加载数据:
def loadDataSet():
    dataMat = []
    labelMat = []
    fr = open('data.txt')
    for line in fr.readlines():
        lineArr = line.strip().split(',')
        dataMat.append([1.0,float(lineArr[1]),float(lineArr[2])])
        labelMat.append(int(lineArr[3]))
    dataMat = np.array(dataMat)
    return dataMat,labelMat
dataMat,labelMat = loadDataSet()
#激活函数:
def logistic(wTx):
    return 1.0/(1.0+np.exp(-wTx))
# 梯度上升算法(每次使用整个数据集来更新weights)
def gradient_ascent(dataMatIn,classLabels):
    labelMat = np.mat(classLabels)
    m,n = np.shape(dataMatIn)
    alpha = 0.001    #步长
    steps = 500      #迭代次数
    weights = np.ones((n,1))   #初始化权重向量
    for k in range(steps):
        gradient = dataMatIn*np.mat(weights)   #计算梯度
        output = logistic(gradient)
        errors = labelMat.T - output  #计算误差
        weights += alpha*dataMatIn.T*errors
    return weights

# 随机梯度上升算法(每次使用一个实例来更新weights)
def stochastic_gradient_ascent(dataMatIn,classLabels):
    dataMatIn = np.array(dataMatIn)
    labelMat = np.array(classLabels)
    alpha = 0.5    #步长
    weights = np.ones((dataMatIn.shape[1],))
    for index, instance in enumerate(dataMatIn):
        predict = logistic(np.sum(instance*weights))
        error = labelMat[index] - predict
        weights += alpha*instance*error
    return weights

def stochastic_gradient_ascent1(data, target):
    data = np.array(data)
    target = np.array(target)
    alpha = 0.01 # step length
    weights = np.ones((data.shape[1],))
    for index, instance in enumerate(data):
        predict = logistic(np.sum(instance*weights))
        error = target[index] - predict
        weights += alpha*instance*error
    return weights
# 增强随机梯度上升算法(每次使用随机实例以及变化学习率来更新weights)  
def improved_stochastic_gradient_ascent(dataMatIn,classLabels):
    labelMat = np.array(classLabels)
    steps = 500
    weights = np.ones(dataMatIn.shape[1]) 
    for i in range(steps):
        iteration_index = np.random.permutation(range(dataMatIn.shape[0]))
        for index in iteration_index :
            alpha = 2./(1.+i+index)+0.05 
            predict = logistic(np.sum(dataMatIn[index]*weights))
            error = labelMat[index] - predict
            weights += alpha*error*dataMatIn[index]
    return weights

#data,labels = loadDataSet()
#results = improved_stochastic_gradient_ascent(data, labels)
#print(results)
def plotBestFit(weight1,weight2,weight3):
        data, target = loadDataSet()
        xcord1 = []
        ycord1 = []
        xcord2 = []
        ycord2 = []
        for index,instance in enumerate(data):
            if target[index] == 0:
                xcord1.append(instance[1])
                ycord1.append(instance[2])
            else:
                xcord2.append(instance[1])
                ycord2.append(instance[2])
        plt.figure(figsize = (8,6))#设置面板大小
        plt.suptitle('logistic_regression_classifier') #回归分类器
        plt.subplot(221) #两行两列第一个还可以写成 plt.subplot(2,2,1)
        plt.title('target') #设置标题
        plt.scatter(xcord1, ycord1, color='g', marker='*', label='negative')
        plt.scatter(xcord2, ycord2, color='b', marker='x', label='positive')
        plt.legend() #设置图例,默认等于scatter的label
        X1 = np.linspace(-8, 10, 100) #从-8到10之间取100个点(等差数列)
        X2 = 5./3.*X1 + 5.0
        plt.plot(X1, X2, color='r')

        plt.subplot(222)
        plt.title('gradient_ascent')
        plt.scatter(xcord1, ycord1, color='g', marker='*', label='negative')
        plt.scatter(xcord2, ycord2, color='b', marker='x', label='positive')
        plt.legend()
        X1 = np.linspace(-8, 10, 100)
        X2 =(-weight1[0,0]-weight1[1,0]*X1)/weight1[2,0]
        plt.plot(X1, X2, color='r')

        plt.subplot(223)
        plt.title('stochastic_gradient_ascent')
        plt.scatter(xcord1, ycord1, color='g', marker='*', label='negative')
        plt.scatter(xcord2, ycord2, color='b', marker='x', label='positive')
        plt.legend()
        X1 = np.linspace(-8, 10, 100)
        X2 = (-weight2[0]-weight2[1]*X1)/weight2[2]
        plt.plot(X1, X2, color='r')

        plt.subplot(224)
        plt.title('improved_stochastic_gradient_ascent')
        plt.scatter(xcord1, ycord1, color='g', marker='*', label='negative')
        plt.scatter(xcord2, ycord2, color='b', marker='x', label='positive')
        plt.legend()
        X1 = np.linspace(-8, 10, 100)
        X2 = (-weight3[0]-weight3[1]*X1)/weight3[2]
        plt.plot(X1, X2, color='r')
        plt.show()

data,labels = loadDataSet()
result1 = gradient_ascent(data,labels)
result2 = stochastic_gradient_ascent(data,labels)
result3 = improved_stochastic_gradient_ascent(data, labels)
plotBestFit(result1,result2,result3)
#print(results)

#plotBestFit(gradient_ascent(loadDataSet()),stochastic_gradient_ascent(loadDataSet()),improved_stochastic_gradient_ascent(loadDataSet()))

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值