本文不涉及逻辑回归的具体原理,只通过python代码实现算法,并且没有用到机器学习库,根据算法流程一步一步实现。
逻辑回归
数据准备
文件中没条数据有两个属性,和一个标签
# 数据准备
def loadData():
x = []
y = []
fr = open('data.txt')
for i in fr.readlines():
i = i.strip().split() # 分隔
x.append([1.0, float(i[0]), float(i[1])]) # 输入数据,增加一个输入1
y.append(int(i[2])) # 标签
return x, y
sigmoid函数
# sigmoid函数
def sigmoid(x):
return 1.0 / (1 + np.exp(-x))
权重更新
采用梯度上升算法来更新权重,又可分为批梯度上升算法,一次性使用全部的数据值
# 批梯度下降,更新权重
def GA(x, y):
x = np.mat(x) # 转化为numpy矩阵
y = np.mat(y).transpose() # 转化为numpy矩阵
m, n = np.shape(x)
alpha = 0.001
epochs = 500
weights = np.ones((n, 1))
for k in range(epochs):
z = x * weights
h = sigmoid(z) # 矩阵乘法
error = (h - y) # 残差
weights = weights - alpha * x.transpose() * error
return weights
也可使用随机梯度上升算法,一次使用一个数据
# 随机梯度下降,更新权重
def SGA(x, y):
x = np.array(x)
m, n = np.shape(x)
alpha = 0.01
weights = np.ones(n) # 初始化权重
for i in range(m):
h = sigmoid(sum(x[i] * weights))
error = h - y[i]
weights = weights - alpha * error * x[i]
return weights
上面两种方式的学习率都是固定的,也可是变化的
# 改进的随机梯度下降
def SGA1(x, y, numIter=150):
m, n = np.shape(x)
x = np.array(x)
weights = np.ones(n)
for j in range(numIter):
dataIndex = list(range(m))
for i in range(m):
alpha = 4/(1.0+j+i)+0.0001
randIndex = int(np.random.uniform(0, len(dataIndex)))
h = sigmoid(sum(x[randIndex]*weights))
error = h - y[randIndex]
weights = weights - alpha * error * x[randIndex]
del(dataIndex[randIndex])
return weights
分类
输入数据完成分类任务
# 分类
def classify(x, weights):
prob = sigmoid(sum(x * weights))
if prob > 0.5:
return 1
else:
return 0
绘制决策边界
# 绘图
def show(weights, x_train, y_train):
x_train = np.array(x_train)
n = np.shape(x_train)[0]
# 坐标
x1 = []
y1 = []
x2 = []
y2 = []
for i in range(n):
if int(y_train[i]) == 1:
x1.append(x_train[i, 1])
y1.append(x_train[i, 2])
else:
x2.append(x_train[i, 1])
y2.append(x_train[i, 2])
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(x1, y1, s=30, c='red', marker='s')
ax.scatter(x2, y2, s=30, c='green')
x = np.arange(-3.0, 3.0, 0.1)
weights = np.array(weights)
y = (-weights[0] - weights[1] * x) / weights[2]
ax.plot(x, y)
plt.xlabel('X1')
plt.ylabel('X2')
plt.show()