代码链接:https://www.pianshen.com/article/1270372172/
from sklearn.datasets import make_blobs
import numpy as np, matplotlib.pyplot as mp
from sklearn.linear_model import LogisticRegression
# 负样本:正样本 = 1:1
X, Y = make_blobs(centers=2, cluster_std=2, random_state=0)
# 负样本:正样本 = 5:1
X0 = np.concatenate([X if i < 1 else X[Y == 0] for i in range(5)])
Y0 = np.concatenate((Y, np.zeros(200, np.int32)))
# 负样本:正样本 = 1:5
X1 = np.concatenate([X if i < 1 else X[Y == 1] for i in range(5)])
Y1 = np.concatenate((Y, np.ones(200, np.int32)))
for i, (Xi, Yi) in enumerate([(X, Y), (X0, Y0), (X1, Y1)]):
# 建模
model = LogisticRegression()
model.fit(Xi, Yi)
# 参数
k = model.coef_[0] # coefficient
b = model.intercept_[0] # bias
# 可视化
mp.subplot(1, 3, i + 1)
x1, x2 = Xi[:, 0], Xi[:, 1]
mp.scatter(x1, x2, c=Yi, alpha=.2) # 原始样本点
x = np.array([x1.min(), x1.max()])
y = (-b - k[0] * x) / k[1] # 决策边界
mp.plot(x, y)
mp.show()
原理链接:
https://blog.csdn.net/weixin_42348394/article/details/112266454