logistics回归
logistics是一个分类模型
L1 Penalty and Sparsity in Logistic Regression
#比较不同值C在L1和L2惩罚下解的稀疏性(零系数百分比)。可以看出,C值越大,模型的自由度越大。
#相反,C的值越小,对模型的约束越大。在L1惩罚的情况下,这会导致稀疏解。
#我们将8x8的数字图像分为两类:0-4和5-9。可视化显示了C变化模型的系数
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
digits = datasets.load_digits()
x,y = digits.data,digits.target
x = StandardScaler().fit_transform(x)#Fit to data, then transform it.
# 此时的x是一个1790*63的矩阵,即标准化之后的矩阵
#sample = StandardScaler().fit(x)#得到矩阵的均值和方差
#sample.mean_ 查看均值
y = (y>4).astype(np.int)#划分因变量,使其变成一个二元的变量
enumerate函数
seq = ['one', 'two', 'three']
for i, element in enumerate(seq):
print (i, element)
结果是:
0 one
1 two
2 three
也就是有一个索引和内容
for i,c in enumerate((1,0.1,0.01)):#c 有三个取值1,0.1,0.01
clf_l1_LR = LogisticRegression(C=c,penalty='l1',tol=0.01,solver='saga')#L1
clf_l2_LR = LogisticRegression(C=c,penalty='l2',tol=0.01,solver='saga')#L2
clf_l1_LR.fit(x,y)
clf_l2_LR.fit(x,y)
coef_l1_LR= clf_l1_LR.coef_.ravel()
coef_l2_LR = clf_l2_LR.coef_.ravel()
sparsity_l1_LR = np.mean(coef_l1_LR==0)*100
sparsity_l2_LR = np.mean(coef_l2_LR==0)*100
print('c=%.2f'%c)
print('sparsity with l1 penalty:%.2f'%sparsity_l1_LR)#稀疏率
print('score with l1 penalty:%.2f'%clf_l1_LR.score(x,y))#R2
print('sparsity with l2 penalty:%.2f' % sparsity_l2_LR)
print('score with l2 penalty:%.2f'%clf_l2_LR.score(x,y))
l1_plot = plt.subplot(3,2,2*i+1)#这个地方是3*2的矩阵图,l1是左边的1,3,5,l2是右边的2,4,6
l2_plot = plt.subplot(3,2,2*(i+1))
l1_plot.imshow(np.abs(coef_l1_LR.reshape(8, 8)), interpolation='nearest',
cmap='binary', vmax=1, vmin=0)
l2_plot.imshow(np.abs(coef_l2_LR.reshape(8, 8)), interpolation='nearest',
cmap='binary', vmax=1, vmin=0)
plt.text(-8, 3, "C = %.2f" % c)
l1_plot.set_xticks(())
l1_plot.set_yticks(())
l2_plot.set_xticks(())
l2_plot.set_yticks(()