【原理】将系数限制在非常接近0的范围内,使用L1正则化,有一部分特征的系数为0。有一些特征被模型忽略掉,可突出体现模型中重要的那些特征。
#导入数据集拆分工具
from sklearn.model_selection import train_test_split
#使用含噪声的数据
from sklearn.datasets import load_diabetes#糖尿病数据集
X,y=load_diabetes().data,load_diabetes().target
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=8)
import numpy as np
#导入套索回归
from sklearn.linear_model import Lasso
#欠拟合
lasso=Lasso().fit(X_train,y_train)
print("套索回归在训练数据集的得分:{:.2f}".format(lasso.score(X_train, y_train)))
print("套索回归在测试数据集的得分:{:.2f}".format(lasso.score(X_test, y_test)))
print("套索回归使用的特征数:{}".format(np.sum(lasso.coef_ != 0)))
#参数调节
lasso01=Lasso(alpha=0.1,max_iter=100000).fit(X_train,y_train)
print("套索回归在训练数据集的得分:{:.2f}".format(lasso01.score(X_train, y_train)))
print("套索回归在测试数据集的得分:{:.2f}".format(lasso01.score(X_test, y_test)))
print(&#