本笔记来源于B站Up主: 有Li 的影像组学系列教学视频
本节(7)主要介绍: 特征筛选之LASSO回归分析(代码实现)
import pandas as pd
import numpy as np
from sklearn.utils import shuffle
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LassoCV
xlsx1_filePath = 'C:/Users/RONG/Desktop/PythonBasic/data_A.xlsx' xlsx2_filePath = 'C:/Users/RONG/Desktop/PythonBasic/data_B.xlsx' data_1 = pd.read_excel(xlsx1_filePath) data_2 = pd.read_excel(xlsx2_filePath) rows_1,__ = data_1.shape rows_2,__ = data_2.shape data_1.insert(0,'label',[0]*rows_1) data_2.insert(0,'label',[1]*rows_2) data = pd.concat([data_1,data_2]) data = shuffle(data) data = data.fillna(0) X = data[data.columns[1:]] y = data['label'] colNames = X.columns X = X.astype(np.float64) X = StandardScaler().fit_transform(X) #new knowledge X = pd.DataFrame(X) X.columns = colNames
LASSO回归
#LASSO method alphas = np.logspace(-3,1,50) print(alphas) model_lassoCV = LassoCV(alphas = alphas, cv = 10, max_iter = 100000).fit(X,y) #cv, cross-validation
print(model_lassoCV.alpha_) coef = pd.Series(model_lassoCV.coef_,index = X.columns) #new knowledge # print(coef) print("Lasso picked " + str(sum(coef != 0)) + " variables and eliminated the other " + str(sum(coef == 0)))
Output:
# 0.020235896477251564 # Lasso picked 8 variables and eliminated the other 99
- 1
- 2
index = coef[coef != 0].index
X = X[index]
# print(X.head())
print(coef[coef != 0])
Output:
# original_shape_Flatness 0.251719
# original_glcm_Correlation -0.005528
# original_glcm_Idmn -0.143942
# original_gldm_DependenceEntropy 0.054091
# original_gldm_SmallDependenceLowGrayLevelEmphasis 0.090112
# original_glszm_SmallAreaLowGrayLevelEmphasis 0.185858
# original_ngtdm_Coarseness -0.156813
# original_ngtdm_Strength -0.004631
# dtype: float64
print(model_lassoCV.intercept_) # > 0.49999999999999994
作者:北欧森林
链接:https://www.jianshu.com/p/cd7f20db585d
来源:简书,已获授权转载RadiomicsWorld.com “影像组学世界”论坛:
影像组学世界/RadiomicsWorld