逻辑回归初学
老规矩,导入库
from sklearn.linear_model import LogisticRegression as LR
import numpy as np
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_breast_cancer
max_iter学习曲线
l2 = []
l2test = []
cancer = load_breast_cancer()
x = cancer.data
y = cancer.target
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3)
for i in np.arange(1,201,10):
L2 = LR(penalty='l2',solver = 'liblinear', C=0.8,max_iter=i)
L2 = L2.fit(x_train,y_train)
l2.append(accuracy_score(L2.predict(x_train),y_train))
l2test.append(accuracy_score(L2.predict(x_test),y_test))
graph = [l2,l2test]
color = ['black','gray']
label = ['l2','l2test']
plt.figure(figsize=(8,6))
for i in range(len(graph)):
plt.plot(np.arange(1,201,10),graph[i],color[i],label=label[i])
plt.legend()
plt.xticks(np.arange(1,201,10))
plt.show()
使用n_iter_来看本次求解中真正实现的迭代次数
L2 = LR(penalty='l2',solver = 'liblinear', C=0.8,max_iter=300).fit(x_train,y_train)
L2.n_iter_
array([21], dtype=int32)
降维
from sklearn.model_selection import cross_val_score
from sklearn.feature_selection import SelectFromModel
cancer.data.shape
(569, 30)
Cancer_LR = LR(solver = 'liblinear', C=0.8)
交叉验证
cross_val_score(Cancer_LR,cancer.data,cancer.target,cv=10).mean()
0.9508998790078644
x_embedded = SelectFromModel(Cancer_LR).fit_transform(cancer.data,cancer.target)
x_embedded.shape
(569, 9)
cross_val_score(Cancer_LR,x_embedded,cancer.target,cv=10).mean()
0.9368323826808401
调整C的学习曲线来降维
fullx = []
delx = []
c = np.arange(0.01,10.01,0.5)
for i in c:
Cancer_LR = LR(solver = 'liblinear', C=i)
fullx.append(cross_val_score(Cancer_LR,cancer.data,cancer.target,cv=10).mean())
x_embedded = SelectFromModel(Cancer_LR).fit_transform(cancer.data,cancer.target)
delx.append(cross_val_score(Cancer_LR,x_embedded,cancer.target,cv=10).mean())
print(max(delx),c[delx.index(max(delx))])
plt.figure(figsize = (20,8))
plt.plot(c,fullx,label="full")
plt.plot(c,delx,label="feature selection")
plt.xticks(c)
plt.legend()
plt.show()
fullx = []
delx = []
c = np.arange(6.05,7.05,0.005)
for i in c:
Cancer_LR = LR(solver = 'liblinear', C=i)
fullx.append(cross_val_score(Cancer_LR,cancer.data,cancer.target,cv=10).mean())
x_embedded = SelectFromModel(Cancer_LR).fit_transform(cancer.data,cancer.target)
delx.append(cross_val_score(Cancer_LR,x_embedded,cancer.target,cv=10).mean())
print(max(delx),c[delx.index(max(delx))])
plt.figure(figsize = (20,8))
plt.plot(c,fullx,label="full")
plt.plot(c,delx,label="feature selection")
plt.xticks(c)
plt.legend()
plt.show()