学习打卡不知道第多少天
# 神经网络基础篇-逻辑斯蒂回归(对数几率回归)
import numpy as np
import matplotlib.pyplot as plt
# 数据准备
def loadDataSet():
dataMat = []
labelMat = []
file = open(r'D:\Users\Lenovo\PycharmProjects\MachineLearning\Datasource\NeuralNetwork\testSet.txt')
for line in file.readlines():
# 将样本去回车,放入列表中
lineArr = line.strip().split()
# 添加数据, 第一项取1是作为偏置
dataMat.append([1.0, float(lineArr[0]), float(lineArr[1])])
# 添加标签
labelMat.append(int(lineArr[2]))
return dataMat, labelMat
def plotDataClass(weights):
# 加载数据集
dataMat, labelMat = loadDataSet()
# 转换成numpy的array数组
dataArr = np.array(dataMat)
# 数据行数
n = np.shape(dataMat)[0]
# 正样本横纵坐标
xcord1 = [];
ycord1 = []
# 负样本横纵坐标
xcord2 = [];
ycord2 = []
# 遍历样本
for i in range(n):
# 依据标签添加数据
if int(labelMat[i]) == 1:
xcord1.append(dataArr[i, 1]);
ycord1.append(dataArr[i, 2])
else:
xcord2.append(dataArr[i, 1]);
ycord2.append(dataArr[i, 2])
# 创建画板
fig = plt.figure()
# 添加子画板
ax = fig.add_subplot(111)
# 添加subplot
ax.scatter(xcord1, ycord1, s=20, c='red', marker='s', alpha=.5) # 绘制正样本
ax.scatter(xcord2, ycord2, s=20, c='green', alpha=.5) # 绘制负样本
x = np.arange(-5.0, 5.0, 0.1)
# 划分决策边界 z = Xθ,0=w0(偏置)+w1x1+w1x2,则x2=(0+w1x1)/ w2
y = (-weights[0] - weights[1] * x) / weights[2]
ax.plot(x, y)
plt.title('BestFit') # 绘制title
plt.xlabel('X1');
plt.ylabel('X2') # 绘制label
plt.show()
# sigmoid函数,将一个实数映射到0-1之间
def sigmoid(inX):
return 1.0 / (1 + np.exp(-inX))
# 梯度上升法 迭代公式:θj = θj + αX.T(y - g(X*θ))
# 使用梯度上升计算极大似然函数,最大化极大似然,也就相当于最小化损失函数,从而获得最优解
def gradAscent(dataMat, classLabels):
# 将输入的特征数组处理为矩阵
dataMatrix = np.mat(dataMat)
# 将输入的标签向量处理为矩阵并转置
labelMat = np.mat(classLabels).transpose()
# 获取特征矩阵的行数和列数
m, n = np.shape(dataMatrix)
# 定义移动步长,也就是学习速率
alpha = 0.001
# 最大迭代次数
maxCycles = 500
# 初始化权重,即θ向量,开始时全为1
weight = np.ones((n, 1))
# 开始训练
for k in range(maxCycles):
# 计算损失函数
h = sigmoid(dataMatrix * weight)
# 计算错误率
error = labelMat - h
# 梯度上升矢量化公式
weight = weight + alpha * dataMatrix.transpose() * error
# 将最终的权值矩阵输出为数组
return weight.getA()
if __name__ == '__main__':
dataMat, labelMat = loadDataSet()
weights = gradAscent(dataMat, labelMat)
print(weights)
plotDataClass(weights)