pytorch版
载入库
import torch
from torch.autograd import Variable
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
# 设置随机数种子
torch.manual_seed(2020)
# 设置新建Tensor的类型
torch.set_default_tensor_type("torch.DoubleTensor")
# jupyter notebook中输出cell的全部结果,而非仅仅是最后一个结果
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
使用iris数据集
X = load_iris()["data"]
Y = load_iris()["target"]
X.shape
Y.shape
pd.Series(Y).value_counts() # 查看Y的种类及频数
(150, 4)
(150,)
2 50
1 50
0 50
dtype: int64
只使用Y=0或Y=1的样本(二分类数据),只使用X的前两个变量作为预测变量
Y, X = Y[Y != 2], X[Y != 2, 0:2]
X.shape
Y.shape
(100, 2)
(100,)
Y=0和Y=1样本点的分布
color = ["red"] * 50 + ["blue"] * 50
plt.scatter(x = X[:, 0], y = X[:, 1], c = color)
plt.show()
<matplotlib.collections.PathCollection at 0x245c3370288>
求解logistic回归方程的系数
X和Y由numpy.array类型转换为Variable类型
x_data = Variable(torch.from_numpy(X))
# Y本身是int类型,torch.from_numpy(Y)后是torch.int类型,使用double()转为double类型
# unsqueeze将Y的形状由[100]转化为[100, 1]
y_data = Variable(torch.from_numpy(Y).unsqueeze(1)).double()
x_data.shape, y_data.shape
(torch.Size([100, 2]), torch.Size([100, 1]))
l
o
g
i
t
(
P
)
=
l
n
(
p
1
−
p
)
=
b
+
X
w
logit(P) = ln(\frac{p}{1-p}) = b + Xw
logit(P)=ln(1−pp)=b+Xw
X
X
X的形状为100 * 2;
w
w
w的形状为2 * 1;
b
b
b的形状为100 * 1
# 初始化待求解参数w和b
# 为实现自动微分,设置requires_grad = True
w = Variable(torch.randn(2, 1), requires_grad = True)
b = Variable(torch.zeros([1]), requires_grad = True)
给定回归方程参数
w
w
w和
b
b
b的情况下,模型对每个样本产生的预测概率:
predict_probability =
e
b
+
X
w
1
+
e
b
+
X
w
\frac{e^{b + Xw}}{1+e^{b + Xw}}
1+eb+Xweb+Xw
def pred_prob(x):
return torch.exp(torch.mm(x, w) + b) / (1 + torch.exp(torch.mm(x, w) + b))
根据对数损失函数loss = - sum(y * ln(predict_probability) + (1 - y) * ln(1 - predict_probability))
求出给定参数
w
w
w和
b
b
b的情况下的损失值
def binary_loss(pred_prob, y):
loss = (y * torch.log(pred_prob) + (1 - y) * torch.log(1 - pred_prob)).sum()
return - loss
梯度下降法优化损失函数,求得当损失函数值最小时 w w w和 b b b的值
# 10轮批量梯度下降
for i in range(10):
# y_pred:模型产生的预测概率
y_pred = pred_prob(x_data)
# loss:给定w和b时的损失值
loss = binary_loss(y_pred, y_data)
# 反向传播
loss.backward()
# w.grad.data和b.grad.data为loss对w和b的偏导数
# 每轮更新一次w和b的值
w.data = w.data - 0.001 * w.grad.data
b.data = b.data - 0.001 * b.grad.data
# 梯度清零
w.grad.data = torch.Tensor([[0], [0]]).double()
b.grad.data = torch.Tensor([0]).double()
# 预测概率大于0.5判定为1,预测概率小于等于0.5判定为0;基于此求出模型对样本的预测准确率
mask = (y_pred > 0.5).long()
acc = (mask == y_data.long()).sum().item() / y_data.shape[0]
print(acc)
0.5
0.5
0.86
0.99
0.99
0.99
0.99
0.99
0.99
0.99
求解出的参数 w w w和 b b b的值
w.data, b.data
(tensor([[ 0.8148],
[-1.3841]]),
tensor([-0.1052]))
画出分类线
x = np.arange(X[:, 0].min(), X[:, 0].max(), 0.1)
y = (- b.data[0].item() - w.data[0, 0].item() * x) / w.data[1, 0].item()
plt.plot(X[:, 0][Y == 0], X[:, 1][Y == 0], "ro", label = "0")
plt.plot(X[:, 0][Y == 1], X[:, 1][Y == 1], "bo", label = "1")
plt.plot(x, y, "g", label = "classifier curve")
plt.legend()
plt.show()
numpy版
载入库
import numpy as np
import pandas as pd
import random
from sympy import symbols, diff, exp, log
from sklearn.datasets import load_iris
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
使用iris数据集
使用Y=0或Y=1的样本点,只使用X的前两个变量作为预测变量
X = load_iris()["data"]
Y = load_iris()["target"]
Y, X = Y[Y != 2], X[Y != 2, 0:2]
X.shape, Y.shape
((100, 2), (100,))
生成待求解参数w1_value、w2_value、b_value的初始值
random.seed(2020)
w1_value = random.random()
w2_value = random.random()
b_value = 0
求解中用到sympy库求偏导的功能,以一个简单例子展示sympy求偏导的用法
x1, x2 = symbols("x1 x2")
# 求偏导的函数
y = x1 * x2 + x2 ** 2
# 生成y对x2偏导的表达式
diff(y, x2)
# 将数值带入到表达式中,求出y对x2偏导的具体数值
diff(y, x2).evalf(subs = {"x1": 5, "x2": 6})
x 1 + 2 x 2 \displaystyle x_{1} + 2 x_{2} x1+2x2
17.0 \displaystyle 17.0 17.0
求解logistic回归方程的系数
for i in range(10):
w1, w2, b = symbols("w1 w2 b")
loss = 0
# 由于sympy不支持向量化运算,这里使用循环代替
for i in range(X.shape[0]):
pred_prob_i = exp(b + w1 * X[i, 0] + w2 * X[i, 1]) / (1 + exp(b + w1 * X[i, 0] + w2 * X[i, 1]))
loss_i = - (Y[i] * log(pred_prob_i) + (1 - Y[i]) * log(1 - pred_prob_i))
loss = loss + loss_i
# loss对w、b求偏导数
dw1 = diff(loss, w1).evalf(subs = {"w1": w1_value, "w2": w2_value, "b": b_value})
dw2 = diff(loss, w2).evalf(subs = {"w1": w1_value, "w2": w2_value, "b": b_value})
db = diff(loss, b).evalf(subs = {"w1": w1_value, "w2": w2_value, "b": b_value})
# 更新w、b
w1_value = w1_value - 0.0015 * dw1
w2_value = w2_value - 0.0015 * dw2
b_value = b_value - 0.0015 * db
# 产生模型预测概率
mid_pred = (b_value + w1_value * X[:, 0] + w2_value * X[:, 1]).astype("float32")
y_pred = np.exp(mid_pred) / (1 + np.exp(mid_pred))
# 模型预测准确率
mask = (y_pred > 0.5) * 1
acc = (mask == Y).sum() / Y.shape[0]
print(acc)
0.5
0.5
0.5
0.97
0.89
0.99
0.98
0.99
0.99
0.99
求解出的w1_value、w2_value、b_value的值
w1_value, w2_value, b_value
(0.273746460796506, -0.423085556545553, -0.121034304094094)
画出分类线
x = np.arange(X[:, 0].min(), X[:, 0].max(), 0.1)
y = (- b_value - w1_value * x) / w2_value
plt.plot(X[:, 0][Y == 0], X[:, 1][Y == 0], "ro", label = "0")
plt.plot(X[:, 0][Y == 1], X[:, 1][Y == 1], "bo", label = "1")
plt.plot(x, y, "g", label = "classifier curve")
plt.legend()
plt.show()
总结
- pytorch的自动微分功能大大简化了求偏导的操作
- 由于两类分得很开,且梯度下降带有一定的随机性,使得pytorch版和numpy版求得的回归方程系数差异较大,但从散点图和分类线可以看出,两套系数都可以起到不错的预测效果