python-11

最新推荐文章于 2024-07-12 16:16:27 发布

qq_48904250

最新推荐文章于 2024-07-12 16:16:27 发布

阅读量975

点赞数

分类专栏： python基础文章标签： python 机器学习 sklearn

本文链接：https://blog.csdn.net/qq_48904250/article/details/124494511

版权

python基础专栏收录该内容

11 篇文章 0 订阅

订阅专栏

逻辑斯蒂回归

逻辑斯蒂回归通常用来解决分类问题，是将样本的特征和样本发生的概率联系起来

在这里插入图片描述

值域是(0,1)；t>0时，p>0.5; t<0时,p<0.5

在这里插入图片描述

该假设函数预测的是分类y=1的发生概率的大小

决策边界
决策边界是用来划分样本的边界分为线性决策边界（y=h_threta(x)-3+x1+x2>=0）和非线性决策边界（y=h_threta(x)-1+x1^2+x22>=0）
逻辑回归的损失函数

在这里插入图片描述

逻辑回归损失函数的梯度

在这里插入图片描述

scikit-learn实现逻辑回归

scikit-learn逻辑回归多分类问题的解决（ovr策略和ovo策略）
独立的OvO and OvR
OVR训练拟合过程
OVO与OVR预测过程

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets

iris = datasets.load_iris()
X = iris.data
y = iris.target

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=666)

#ovr (One versus rest策略)

log_reg = LogisticRegression(multi_class=‘ovr’) # one versus rest
log_reg.fit(X_train,y_train)
y_predict = log_reg.predict(X_test)
log_reg.score(X_test,y_test)

#ovo (One versus one策略)====>更准确！

log_reg = LogisticRegression(multi_class=‘multinomial’,
solver=“newton-cg”)

在逻辑回归中使用多项式特征

import numpy as np
import matplotlib.pyplot as plt

np.random.seed(666)
X = np.random.normal(0, 1, size=(200, 2))

y = np.array((X[:,0]**2+X[:,1])<1.5, dtype=‘int’)
for _ in range(20):
y[np.random.randint(200)] = 1

plt.scatter(X[y0,0], X[y0,1])
plt.scatter(X[y1,0], X[y1,1])
plt.show()

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=666)

#绘制决策边界的函数

def plot_decision_boundary(model, axis):
x0, x1 = np.meshgrid(
np.linspace(axis[0], axis[1], int((axis[1]-axis[0])*100)).reshape(-1, 1),
np.linspace(axis[2], axis[3], int((axis[3]-axis[2])*100)).reshape(-1, 1),
)
X_new = np.c_[x0.ravel(), x1.ravel()]

y_predict = model.predict(X_new)
zz = y_predict.reshape(x0.shape)

from matplotlib.colors import ListedColormap
custom_cmap = ListedColormap([‘#EF9A9A’,‘#FFF59D’,‘#90CAF9’])

plt.contourf(x0, x1, zz, linewidth=5, cmap=custom_cmap)

#scikit-learn中的逻辑回归

from sklearn.linear_model import LogisticRegression

log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
log_reg.score(X_train,y_train)
log_reg.score(X_test,y_test)
log_reg.predict(X_test)

plot_decision_boundary(log_reg, axis=[-4, 4, -4, 4])
plt.scatter(X[y0,0], X[y0,1])
plt.scatter(X[y1,0], X[y1,1])
plt.show()

#使用多项式回归的逻辑回归

def PolynomialLogisticRegression(degree, C, penalty=‘l2’):
return Pipeline([
(‘poly’, PolynomialFeatures(degree=degree)),
(‘std_scaler’, StandardScaler()),
(‘log_reg’, LogisticRegression(C=C, penalty=penalty))
])

poly_log_reg = PolynomialLogisticRegression(degree=20, C=0.1, penalty=‘l1’)
poly_log_reg.fit(X_train, y_train)

poly_log_reg.score(X_train, y_train)
poly_log_reg.score(X_test, y_test)

plot_decision_boundary(poly_log_reg, axis=[-4, 4, -4, 4])
plt.scatter(X[y0,0], X[y0,1])
plt.scatter(X[y1,0], X[y1,1])
plt.show()