逻辑斯蒂回归虽然有"回归"两个字,实际上它是一个分类模型,它可以输出属于每一个类别的概率
1 二分类
import numpy as np
from sklearn import datasets
# 加载 sklearn 自带数据
iris = datasets.load_iris()
# 取单一特征
X = iris["data"][:, 3:] # petal width
# 将target变为两类, 标签为 2 的设置为 1,其它为0
y = (iris["target"] == 2).astype(np.int)
# 引入逻辑回归
from sklearn.linear_model import LogisticRegression
log_reg = LogisticRegression(solver="lbfgs", random_state=42)
log_reg.fit(X, y)
# 预测输入
X_new = np.linspace(0, 3, 10).reshape(-1, 1)
# 输出分类概率,每一个输入输出k个值,表示每一类概率
y_proba = log_reg.predict_proba(X_new)
print(y_proba )
>>>[[9.99250016e-01 7.49984089e-04]
[9.96828414e-01 3.17158633e-03]
[9.86691904e-01 1.33080964e-02]
[9.45916426e-01 5.40835739e-02]
[8.04908853e-01 1.95091147e-01]
[4.93225575e-01 5.06774425e-01]
[1.86720667e-01 8.13279333e-01]
[5.13769237e-02 9.48623076e-01]
[1.26148692e-02 9.87385131e-01]
[3.00476842e-03 9.96995232e-01]]
# 预测分类结果
y = log_reg.predict(X_new)
>>>[0 0 0 0 0 1 1 1 1 1]
2 多分类
import numpy as np
from sklearn.linear_model import LogisticRegression
# 加载 sklearn 自带数据
iris = datasets.load_iris()
# 取两个特征 petal length, petal width
X = iris["data"][:, (2, 3)]
y = iris["target"]
softmax_reg = LogisticRegression(multi_class="multinomial",solver="lbfgs", C=10, random_state=42)
softmax_reg.fit(X, y)
x0, x1 = np.meshgrid(
np.linspace(0, 8, 3).reshape(-1, 1),
np.linspace(0, 3.5, 3).reshape(-1, 1))
X_new = np.c_[x0.ravel(), x1.ravel()]
y_proba = softmax_reg.predict_proba(X_new)
print(y_proba)
>>>[[9.99996239e-01 3.76140758e-06 6.80030712e-20]
[1.50628731e-03 9.98493250e-01 4.62599964e-07]
[6.64988192e-13 7.76848864e-02 9.22315114e-01]
[9.99995653e-01 4.34707831e-06 7.58462461e-15]
[1.24789116e-03 9.56007603e-01 4.27445060e-02]
[6.46440282e-18 8.72767038e-07 9.99999127e-01]
[9.99994975e-01 5.02394057e-06 8.45940105e-10]
[2.61691633e-07 2.31697741e-04 9.99768041e-01]
[5.79592787e-23 9.04357657e-12 1.00000000e+00]]
y_predict = softmax_reg.predict(X_new)
print(y_predict)
>>>[0 1 2 0 1 2 0 2 2]
参考:Hands-on Machine Learning with Scikit-Learn, Keras, and TensorFlow, 2nd Edition