'''
Created on 2017年12月29日
@author: XiongYan
'''
#logistic回归分类器:
import numpy as np
import matplotlib.pyplot as plt
#创建数据:只用调用一次:
def create_dataset():
nums = 300
X0 = np.ones(nums)
X1 = np.linspace(-8, 10, nums)
# np.random.shuffle(X1)
X2 = 5./3.*X1 + 5.
error = np.random.normal(1, 12, (nums,))
_X2_ = X2 + error
label = np.zeros(nums, dtype=np.int32)
for i in range(len(error)):
if error[i] > 0:
label[i] = 1
# print(label.dtype)
# data = np.hstack((X0.reshape(-1, 1), X1.reshape(-1, 1), _X2_.reshape(-1, 1))) # ?????
data = np.column_stack((X0, X1, _X2_))
with open('data.txt', 'w') as fw:
for i in range(len(data)):
fw.write(','.join(map(str, data[i]))+','+str(label[i]))
if i < len(data)-1:
fw.write('\n')
#create_dataset()
#加载数据:
def loadDataSet():
dataMat = []
labelMat = []
fr = open('data.txt')
for line in fr.readlines():
lineArr = line.strip().split(',')
dataMat.append([1.0,float(lineArr[1]),float(lineArr[2])])
labelMat.append(int(lineArr[3]))
dataMat = np.array(dataMat)
return dataMat,labelMat
dataMat,labelMat = loadDataSet()
#激活函数:
def logistic(wTx):
return 1.0/(1.0+np.exp(-wTx))
# 梯度上升算法(每次使用整个数据集来更新weights)
def gradient_ascent(dataMatIn,classLabels):
labelMat = np.mat(classLabels)
m,n = np.shape(dataMatIn)
alpha = 0.001 #步长
steps = 500 #迭代次数
weights = np.ones((n,1)) #初始化权重向量
for k in range(steps):
gradient = dataMatIn*np.mat(weights) #计算梯度
output = logistic(gradient)
errors = labelMat.T - output #计算误差
weights += alpha*dataMatIn.T*errors
return weights
# 随机梯度上升算法(每次使用一个实例来更新weights)
def stochastic_gradient_ascent(dataMatIn,classLabels):
dataMatIn = np.array(dataMatIn)
labelMat = np.array(classLabels)
alpha = 0.5 #步长
weights = np.ones((dataMatIn.shape[1],))
for index, instance in enumerate(dataMatIn):
predict = logistic(np.sum(instance*weights))
error = labelMat[index] - predict
weights += alpha*instance*error
return weights
def stochastic_gradient_ascent1(data, target):
data = np.array(data)
target = np.array(target)
alpha = 0.01 # step length
weights = np.ones((data.shape[1],))
for index, instance in enumerate(data):
predict = logistic(np.sum(instance*weights))
error = target[index] - predict
weights += alpha*instance*error
return weights
# 增强随机梯度上升算法(每次使用随机实例以及变化学习率来更新weights)
def improved_stochastic_gradient_ascent(dataMatIn,classLabels):
labelMat = np.array(classLabels)
steps = 500
weights = np.ones(dataMatIn.shape[1])
for i in range(steps):
iteration_index = np.random.permutation(range(dataMatIn.shape[0]))
for index in iteration_index :
alpha = 2./(1.+i+index)+0.05
predict = logistic(np.sum(dataMatIn[index]*weights))
error = labelMat[index] - predict
weights += alpha*error*dataMatIn[index]
return weights
#data,labels = loadDataSet()
#results = improved_stochastic_gradient_ascent(data, labels)
#print(results)
def plotBestFit(weight1,weight2,weight3):
data, target = loadDataSet()
xcord1 = []
ycord1 = []
xcord2 = []
ycord2 = []
for index,instance in enumerate(data):
if target[index] == 0:
xcord1.append(instance[1])
ycord1.append(instance[2])
else:
xcord2.append(instance[1])
ycord2.append(instance[2])
plt.figure(figsize = (8,6))#设置面板大小
plt.suptitle('logistic_regression_classifier') #回归分类器
plt.subplot(221) #两行两列第一个还可以写成 plt.subplot(2,2,1)
plt.title('target') #设置标题
plt.scatter(xcord1, ycord1, color='g', marker='*', label='negative')
plt.scatter(xcord2, ycord2, color='b', marker='x', label='positive')
plt.legend() #设置图例,默认等于scatter的label
X1 = np.linspace(-8, 10, 100) #从-8到10之间取100个点(等差数列)
X2 = 5./3.*X1 + 5.0
plt.plot(X1, X2, color='r')
plt.subplot(222)
plt.title('gradient_ascent')
plt.scatter(xcord1, ycord1, color='g', marker='*', label='negative')
plt.scatter(xcord2, ycord2, color='b', marker='x', label='positive')
plt.legend()
X1 = np.linspace(-8, 10, 100)
X2 =(-weight1[0,0]-weight1[1,0]*X1)/weight1[2,0]
plt.plot(X1, X2, color='r')
plt.subplot(223)
plt.title('stochastic_gradient_ascent')
plt.scatter(xcord1, ycord1, color='g', marker='*', label='negative')
plt.scatter(xcord2, ycord2, color='b', marker='x', label='positive')
plt.legend()
X1 = np.linspace(-8, 10, 100)
X2 = (-weight2[0]-weight2[1]*X1)/weight2[2]
plt.plot(X1, X2, color='r')
plt.subplot(224)
plt.title('improved_stochastic_gradient_ascent')
plt.scatter(xcord1, ycord1, color='g', marker='*', label='negative')
plt.scatter(xcord2, ycord2, color='b', marker='x', label='positive')
plt.legend()
X1 = np.linspace(-8, 10, 100)
X2 = (-weight3[0]-weight3[1]*X1)/weight3[2]
plt.plot(X1, X2, color='r')
plt.show()
data,labels = loadDataSet()
result1 = gradient_ascent(data,labels)
result2 = stochastic_gradient_ascent(data,labels)
result3 = improved_stochastic_gradient_ascent(data, labels)
plotBestFit(result1,result2,result3)
#print(results)
#plotBestFit(gradient_ascent(loadDataSet()),stochastic_gradient_ascent(loadDataSet()),improved_stochastic_gradient_ascent(loadDataSet()))
logistic回归分类器
最新推荐文章于 2024-09-19 08:53:06 发布