Logistic回归(python实战)
Logistic 回归(理论篇)介绍了Logistic回归的原理及其公式推导,本篇主要通过Python实现Logistic回归。
Logistic回归带L2正则的目标函数
J(θ)=−∑(y(i)log(θTx(i))+(1−y(i))(1−log(θTx(i))))+12λ||θT||
J
(
θ
)
=
−
∑
(
y
(
i
)
l
o
g
(
θ
T
x
(
i
)
)
+
(
1
−
y
(
i
)
)
(
1
−
l
o
g
(
θ
T
x
(
i
)
)
)
)
+
1
2
λ
|
|
θ
T
|
|
随机梯度下降求解参数
参数更新公式:
θ:=θ−α((θTx(i)−y(i))x(i)j+λθ)
θ
:=
θ
−
α
(
(
θ
T
x
(
i
)
−
y
(
i
)
)
x
j
(
i
)
+
λ
θ
)
python3实战
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
sigmod函数
def sigmoid(x):
return 1/(1 + np.exp(x))
带L2正则LogsticRegression
def logsticRegression(data, label, max_iter=1000, alpha=0.01,eps=1e-5):
size = data.shape
theta = np.zeros(size[1])
for iter in range(max_iter):
for i,x in enumerate(data):
diff = alpha * ((sigmoid(-np.dot(theta,x)) - label[i]) * x)
theta = theta - diff
if np.max(np.abs(diff)) < eps :
return theta
return theta
从seaborn中直接加载鸢尾花数据集
iris = sns.load_dataset("iris")
选择setosa和virginica两个类别做Logistic Regression
data = iris.loc[iris["species"].isin(["setosa","virginica"])][["sepal_length","sepal_width"]]
label = iris.loc[iris["species"].isin(["setosa","virginica"])]["species"]
将类别标记为0和1
iris_types = label.unique()
for i, iris_type in enumerate(iris_types):
label[label == iris_type] = i
样本的显示
plt.scatter(data["sepal_length"], data["sepal_width"],c=label)
增广矩阵并估计参数
data = data.reindex(columns=["x0","sepal_length","sepal_width"], fill_value=1)
theta = logsticRegression(data.values,label.values,10000,0.005)
theta
array([-1.02559933, 2.76979605, -4.62939623])
画图显示结果
N, M = 500, 500 # 横纵各采样多少个值
x1_min, x1_max = data.values[:, 1].min(), data.values[:, 1].max() # 第0列的范围
x2_min, x2_max = data.values[:, 2].min(), data.values[:, 2].max() # 第1列的范围
t1 = np.linspace(x1_min, x1_max, N)
t2 = np.linspace(x2_min, x2_max, M)
x1, x2 = np.meshgrid(t1, t2) # 生成网格采样点
x_test = np.stack((np.ones(x1.size),x1.flat, x2.flat), axis=1) # 测试点
def predict(theta,x_test):
binary = lambda x:1 if x > 0.5 else 0
y_hat = np.array([ binary(x) for x in [ 1/(1 + np.exp(-np.dot(theta,x))) for x in x_test] ])
return y_hat
y_hat = predict(theta,x_test)
y_hat = y_hat.reshape(x1.shape)
cm_light = mpl.colors.ListedColormap(['#77E0A0', '#FF8080', '#A0A0FF'])
cm_dark = mpl.colors.ListedColormap(['g', 'r', 'b'])
plt.pcolormesh(x1, x2, y_hat, cmap=cm_light) # 预测值的显示
plt.scatter(data["sepal_length"], data["sepal_width"],c=label,edgecolors='k', s=50, cmap=cm_dark) # 样本显示
plt.xlabel('petal length')
plt.ylabel('petal width')
plt.xlim(x1_min, x1_max)
plt.ylim(x2_min, x2_max)
plt.grid()