深度学习花书 笔记2-二项逻辑回归模型推导与编写代码(Binomial logistic regression model)
1. 原理推导
2. 逻辑回归二分类代码
首先随机创建一些数据
'''
Created on Apr 5, 2020
Author: ***
'''
import os, sys
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
n = 200 #数据的个数
fw = open('data.txt', 'w')
classes = [0, 1]
x1 = np.random.uniform(1, 10, 100)
y1 = np.random.uniform(1, 10, 100)
for i in range(100):
fw.write("%f\t%f\t%d\n" % (x1[i], y1[i], classes[0]))
x2 = np.random.uniform(11, 20, 100)
y2 = np.random.uniform(11, 20, 100)
for i in range(100):
fw.write("%f\t%f\t%d\n" % (x2[i], y2[i], classes[1]))
fw.close()
figure = plt.figure()
ax = figure.add_subplot(111)
ax.scatter(x1, y1, marker='o',s=80, c='green')
ax.scatter(x2, y2, marker='^',s=80, c='red')
plt.show()
数据显示,如下图, 共两类数据
下面是具体的编程实现逻辑回归:
'''
Created on Apr 5, 2020
Author: ****
'''
import os, sys
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
def LoadDataSet(fileName, delim='\t'):
fr = open(fileName)
stringArr = [line.strip().split(delim) for line in fr.readlines()]
datArr = [list(map(float, line)) for line in stringArr]
return np.mat(datArr)
def BuildMat(data):
data_build = np.zeros_like(data)
data_build[:, 2] = 1 # 需要给输入向量加一个偏置项,对应权值参数中的b,即 w*x +b
data_build[:, 0:2] = data[:, :-1]
return data_build
def InitParameter(data):
labels = data[:, -1] # 获取数据类别
rows, cols = data.shape
weights = np.ones((cols, 1)) # 初始化权重向量[w1, w2, b]
data_build = BuildMat(data)
return labels, weights, data_build
def LogisticRegression(F):
sigmoid = 1.0 / (1.0 + np.exp(-F)) # sigmoid 函数
return sigmoid
def Train(data, labels, weights, epoch, learning_rate):
weights_list = []
for i in range(epoch):
wx = data * np.mat(weights) # w * x
output = LogisticRegression(wx)
loss = labels - output
weights = weights + learning_rate * data.T * loss # 根据推导出来的似然函数的导数进行编写
weights_list.append(weights)
return weights
def Classifier(test_data, weights):
F = np.sum(test_data * weights) # wx + b
prob = LogisticRegression(F)
if prob > 0.5:
cur_class = 1
else:
cur_class = 0
return cur_class
def DisplayData(data):
figure = plt.figure()
ax = figure.add_subplot(111)
for i in range(len(data)):
if data[i, 2] == 0:
ax.scatter(data[i, 0], data[i, 1], marker='o', s=80, c='green')
if data[i, 2] == 1:
ax.scatter(data[i, 0], data[i, 1], marker='^', s=80, c='red')
if __name__ == '__main__':
# 1. 读取数据
data = LoadDataSet('data.txt')
# 2. 将数据显示出来
DisplayData(data)
# 3. 初始化参数
labels, weights, data_build = InitParameter(data)
# 4. 训练
weights = Train(data_build, labels, weights, epoch=1000, learning_rate=0.001)
# 5. 画出超平面
X = np.linspace(-5, 25, 301)
weights = np.array(weights)
Y = -(weights[2] + X * weights[1]) / weights[0]
plt.plot(X, Y)
print(weights)
# 6. 分类测试
testdata = BuildMat(np.mat([11, 11, 1.0]))
classes = Classifier(testdata, weights)
print(classes)
if classes == 1:
plt.plot(testdata[0, 0], testdata[0, 1], '^', color='black', markersize=10)
else:
plt.plot(testdata[0, 0], testdata[0, 1], 'o', color='black', markersize=10)
testdata = BuildMat(np.mat([5, 11, 0.0]))
classes = Classifier(testdata, weights)
print(classes)
if classes == 1:
plt.plot(testdata[0, 0], testdata[0, 1], '^', color='black', markersize=10)
else:
plt.plot(testdata[0, 0], testdata[0, 1], 'o', color='black', markersize=10)
plt.show()
测试结果如下, 如果分类为第一类,则标记为黑色的三角形,如果为第0类,则标记为黑色的圆形。