1、加载数据
from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_boston
from sklearn.model_selection import cross_val_score
boston = load_boston()
scaler = StandardScaler()
X = scaler.fit_transform(boston["data"])
Y = boston["target"]
names = boston["feature_names"]
2、选择最优的正则化参数
from sklearn.linear_model import LassoCV
model_lasso = LassoCV(alphas = [0.1,1,0.001, 0.0005]).fit(X,Y)
model_lasso.alpha_
3、输出看模型最终选择了几个特征向量,剔除了几个特征向量
import pandas as pd
coef = pd.Series(model_lasso.coef_, index = names)
print("Lasso picked " + str(sum(coef != 0)) + " variables and eliminated the other " + str(sum(coef == 0)) + " variables")
4、画出特征变量的重要程度
import matplotlib
imp_coef = pd.concat([coef.sort_values().head(3),
coef.sort_values().tail(3)])
matplotlib.rcParams['figure.figsize'] = (8.0, 10.0)
coef.plot(kind = "barh")
plt.title("Coefficients in the Lasso Model")
plt.show()