# 逻辑斯蒂回归
import numpy as np
import random
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
import datetime
# 加载数据
def loadDataSet():
dataMat=[]
labelMat=[]
file=open(r'D:\Users\Lenovo\PycharmProjects\MachineLearning\Datasource\NeuralNetwork\testSet.txt')
for line in file.readlines():
lineArr=line.strip().split()
dataMat.append([1.0, float(lineArr[0]), float(lineArr[1])])
labelMat.append(int(lineArr[2]))
file.close()
return dataMat, labelMat
# sigmoid函数
def sigmoid(inX):
return 1.0 / (1 + np.exp(-inX))
# 画图展示分类结果
def plotBestFit(weights):
dataMat, labelMat=loadDataSet()
print(type(dataMat))
# 将numpy.ndarray类型转换为Array类型
dataArr=np.array(dataMat)
print(type(dataArr))
# 获取数据集行数
n=np.shape(dataMat)[0]
xcord1=[]
ycord1=[]
xcord2=[]
ycord2=[]
for i in range(n):
if int(labelMat[i] == 1):
xcord1.append(dataArr[i, 1])
ycord1.append(dataArr[i, 2])
else:
xcord2.append(dataArr[i, 1])
ycord2.append(dataArr[i, 2])
fig=plt.figure()
plt.scatter(xcord1, ycord1, s=20, c='red', marker='s', alpha=0.5)
plt.scatter(xcord2, ycord2, s=20, c='green')
x=np.arange(-3, 3, 0.1)
y=(-weights[0] - weights[1] * x) / weights[2]
plt.plot(x, y)
plt.xlabel('X1')
plt.ylabel('X2')
plt.title('BestFit')
plt.show()
# 梯度上升法 迭代公式:θj = θj + αX.T(y - g(X*θ))
# 使用梯度上升计算极大似然函数,最大化极大似然,也就相当于最小化损失函数,从而获得最优解
def gradAscent(dataMat, classLabels):
"""
:param dataMat:训练集
:param classLabels: 分类标签
:return: 权重w0
"""
starttime=datetime.datetime.now()
# 将输入的特征数组处理为矩阵
dataMatrix=np.mat(dataMat)
# 将输入的标签向量处理为矩阵并转置
labelMat=np.mat(classLabels).transpose()
# 获取特征矩阵的行数和列数
m, n=np.shape(dataMatrix)
# 定义移动步长,也就是学习速率
alpha=0.01
# 最大迭代次数
maxCycles=500
# 初始化权重,即θ向量,开始时全为1
weights=np.ones((n, 1))
weights_array=np.array([])
# 开始训练
for k in range(maxCycles):
# 计算损失函数
h=sigmoid(dataMatrix * weights)
# 计算错误率
error=labelMat - h
# 梯度上升矢量化公式
weights=weights + alpha * dataMatrix.transpose() * error
weights_array=np.append(weights_array, weights)
weights_array=weights_array.reshape(maxCycles, n)
finishtime=datetime.datetime.now()
timeuse=finishtime - starttime
return weights.getA(), weights_array, timeuse
# 改进后的梯度上升算法 随机梯度上升
def stocGradAscent1(dataMatrix, classLabels, numIter=100):
"""
:param dataMatrix: 数据集
:param classLabels: 标签向量
:param numIter: 迭代次数
:return: 最优回归系数
"""
starttime=datetime.datetime.now()
# 返回dataMatrix的大小。m为行数,n为列数。
m, n=np.shape(dataMatrix)
# 参数初始化
weights=np.ones(n)
# 存储每次更新的回归系数
weights_array=np.array([])
for j in range(numIter):
dataIndex=list(range(m))
for i in range(m):
# 降低alpha的大小,每次减小1/(j+i)。
alpha=4 / (1.0 + j + i) + 0.01
# 随机选取样本
randIndex=int(random.uniform(0, len(dataIndex)))
# 选择随机选取的一个样本,计算h
h=sigmoid(sum(dataMatrix[randIndex] * weights))
# 计算误差
error=classLabels[randIndex] - h
# 更新回归系数
weights=weights + alpha * error * dataMatrix[randIndex]
# 添加回归系数到数组中
weights_array=np.append(weights_array, weights, axis=0)
# 删除已经使用的样本
del (dataIndex[randIndex])
# 改变维度
weights_array=weights_array.reshape(numIter * m, n)
finishtime=datetime.datetime.now()
timeuse=finishtime - starttime
return weights, weights_array, timeuse # 返回
# 绘制回归系数与迭代次数的关系,看出随机梯度上升和全局梯度上升
def plotWeights(weights_array1, weights_array2):
# 获得字体对象
font=FontProperties(fname=r"c:\windows\fonts\simsun.ttc", size=14)
# 将画布分成三行两列,六个区域,不共享x轴,y轴
fig, axs=plt.subplots(nrows=3, ncols=2, sharex=False, sharey=False, figsize=(20, 10))
x1=np.arange(0, len(weights_array1), 1)
# 绘制w0与迭代次数的关系
axs[0][0].plot(x1, weights_array1[:, 0])
axs0_title_text=axs[0][0].set_title(u'随机梯度上升:回归系数与迭代次数关系', FontProperties=font)
axs0_ylabel_text=axs[0][0].set_ylabel(u'W0', FontProperties=font)
plt.setp(axs0_title_text, size=20, weight='bold', color='black')
plt.setp(axs0_ylabel_text, size=20, weight='bold', color='black')
# 绘制w1与迭代次数的关系
axs[1][0].plot(x1, weights_array1[:, 1])
axs1_ylabel_text=axs[1][0].set_ylabel(u'W1', FontProperties=font)
plt.setp(axs1_ylabel_text, size=20, weight='bold', color='black')
# 绘制w2与迭代次数的关系
axs[2][0].plot(x1, weights_array1[:, 2])
axs2_xlabel_text=axs[2][0].set_xlabel(u'迭代次数', FontProperties=font)
axs2_ylabel_text=axs[2][0].set_ylabel(u'W1', FontProperties=font)
plt.setp(axs2_xlabel_text, size=20, weight='bold', color='black')
plt.setp(axs2_ylabel_text, size=20, weight='bold', color='black')
x2=np.arange(0, len(weights_array2), 1)
# 绘制w0与迭代次数的关系
axs[0][1].plot(x2, weights_array2[:, 0])
axs0_title_text=axs[0][1].set_title(u'全局梯度上升:回归系数与迭代次数关系', FontProperties=font)
axs0_ylabel_text=axs[0][1].set_ylabel(u'W0', FontProperties=font)
plt.setp(axs0_title_text, size=20, weight='bold', color='black')
plt.setp(axs0_ylabel_text, size=20, weight='bold', color='black')
# 绘制w1与迭代次数的关系
axs[1][1].plot(x2, weights_array2[:, 1])
axs1_ylabel_text=axs[1][1].set_ylabel(u'W1', FontProperties=font)
plt.setp(axs1_ylabel_text, size=20, weight='bold', color='black')
# 绘制w2与迭代次数的关系
axs[2][1].plot(x2, weights_array2[:, 2])
axs2_xlabel_text=axs[2][1].set_xlabel(u'迭代次数', FontProperties=font)
axs2_ylabel_text=axs[2][1].set_ylabel(u'W1', FontProperties=font)
plt.setp(axs2_xlabel_text, size=20, weight='bold', color='black')
plt.setp(axs2_ylabel_text, size=20, weight='bold', color='black')
plt.show()
# 主函数
if __name__ == '__main__':
dataMat, labelMat=loadDataSet()
weights1, weights_array1, timeuse1=stocGradAscent1(np.array(dataMat), labelMat)
weights2, weights_array2, timeuse2=gradAscent(dataMat, labelMat)
print(weights_array1)
print(weights_array2)
print("随机梯度上升用时:", timeuse1)
print("全局梯度上升用时:", timeuse2)
plotWeights(weights_array1, weights_array2)
对数几率回归
最新推荐文章于 2024-05-23 10:40:33 发布