kaggle住房预测项目——第2部分(bagging)
基线模型
import xgboost as xgb
import copy
import datetime,time
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_log_error
4
from sklearn.metrics import make_scorer
def xgb_eval(data):
def my_error_func(y_ture, y_pred):
error = np.sqrt(mean_squared_log_error(y_ture, y_pred))
return error
my_score = make_scorer(my_error_func, greater_is_better=False)
start = datetime.datetime.now()
train_df = copy.deepcopy(data)
X_train=train_df.drop(['SalePrice'],axis=1)
y_train=train_df.loc[:,'SalePrice'].values
model = xgb.XGBRegressor(
n_jobs=-1,
random_state=666
)
param_grid = {
'learning_rate': [0.1],
'n_estimators': [100]
}
gridsearch = GridSearchCV(
model,
param_grid=param_grid,
scoring=my_score,
cv=5,
verbose=3,
n_jobs=-1
)
gridsearch.fit(X_train, y_train)
print('参数的最佳取值:{0}'.format(gridsearch.best_params_))
print('最佳模型得分:{0}'.format(-gridsearch.best_score_))
end = datetime.datetime.now()
print('run time is:',(end-start).seconds,'秒')
return -gridsearch.best_score_, gridsearch.cv_results_
数据预处理
data = pd.concat([data_train, data_test], axis=0)
data.shape
(2919, 81)
缺失值处理
missing_data(data)
Total | Percent | |
---|---|---|
PoolQC | 2909 | 99.657417 |
MiscFeature | 2814 | 96.402878 |
Alley | 2721 | 93.216855 |
Fence | 2348 | 80.438506 |
SalePrice | 1459 | 49.982871 |
FireplaceQu | 1420 | 48.646797 |
LotFrontage | 486 | 16.649538 |
GarageQual | 159 | 5.447071 |
GarageYrBlt | 159 | 5.447071 |
GarageFinish | 159 | 5.447071 |
GarageCond | 159 | 5.447071 |
GarageType | 157 | 5.378554 |
BsmtExposure | 82 | 2.809181 |
BsmtCond | 82 | 2.809181 |
BsmtQual | 81 | 2.774923 |
BsmtFinType2 | 80 | 2.740665 |
BsmtFinType1 | 79 | 2.706406 |
MasVnrType | 24 | 0.822199 |
MasVnrArea | 23 | 0.787941 |
MSZoning | 4 | 0.137033 |
Utilities | 2 | 0.068517 |
Functional | 2 | 0.068517 |
BsmtFullBath | 2 | 0.068517 |
BsmtHalfBath | 2 | 0.068517 |
GarageArea | 1 | 0.034258 |
BsmtFinSF2 | 1 | 0.034258 |
Exterior1st | 1 | 0.034258 |
TotalBsmtSF | 1 | 0.034258 |
GarageCars | 1 | 0.034258 |
BsmtUnfSF | 1 | 0.034258 |
Electrical | 1 | 0.034258 |
BsmtFinSF1 | 1 | 0.034258 |
KitchenQual | 1 | 0.034258 |
SaleType | 1 | 0.034258 |
Exterior2nd | 1 | 0.034258 |
Street | 0 | 0.000000 |
RoofMatl | 0 | 0.000000 |
MSSubClass | 0 | 0.000000 |
LotArea | 0 | 0.000000 |
OverallCond | 0 | 0.000000 |
RoofStyle | 0 | 0.000000 |
YearRemodAdd | 0 | 0.000000 |
YearBuilt | 0 | 0.000000 |
OverallQual | 0 | 0.000000 |
HouseStyle | 0 | 0.000000 |
BldgType | 0 | 0.000000 |
Condition2 | 0 | 0.000000 |
Condition1 | 0 | 0.000000 |
LandSlope | 0 | 0.000000 |
LotShape | 0 | 0.000000 |
LandContour | 0 | 0.000000 |
LotConfig | 0 | 0.000000 |
Neighborhood | 0 | 0.000000 |
HeatingQC | 0 | 0.000000 |
ExterQual | 0 | 0.000000 |
TotRmsAbvGrd | 0 | 0.000000 |
YrSold | 0 | 0.000000 |
MoSold | 0 | 0.000000 |
MiscVal | 0 | 0.000000 |
PoolArea | 0 | 0.000000 |
ScreenPorch | 0 | 0.000000 |
3SsnPorch | 0 | 0.000000 |
EnclosedPorch | 0 | 0.000000 |
OpenPorchSF | 0 | 0.000000 |
WoodDeckSF | 0 | 0.000000 |
PavedDrive | 0 | 0.000000 |
Fireplaces | 0 | 0.000000 |
KitchenAbvGr | 0 | 0.000000 |
ExterCond | 0 | 0.000000 |
BedroomAbvGr | 0 | 0.000000 |
HalfBath | 0 | 0.000000 |
FullBath | 0 | 0.000000 |
GrLivArea | 0 | 0.000000 |
LowQualFinSF | 0 | 0.000000 |
2ndFlrSF | 0 | 0.000000 |
1stFlrSF | 0 | 0.000000 |
CentralAir | 0 | 0.000000 |
SaleCondition | 0 | 0.000000 |
Heating | 0 | 0.000000 |
Foundation | 0 | 0.000000 |
Id | 0 | 0.000000 |
直接删除处理
# 删除属性
def delete_feature(df):
N = df.shape[0] # 样本数
no_nan_count = df.count().to_frame().T # 每一维特征非缺失值的数量
del_feature, save_feature = [], []
for col in no_nan_count.columns.tolist():
loss_rate = (N - no_nan_count[col].values[0])/N # 缺失率
# print(loss_rate)
if loss_rate >= 0.8: # 缺失率大于 80% 时,将这一维特征删除
del_feature.append(col)
else:
save_feature.append(col)
return del_feature, df[save_feature]
del_feature, data = delete_feature(data)
print(del_feature)
data.head()
['Alley', 'PoolQC', 'Fence', 'MiscFeature']
Id | MSSubClass | MSZoning | LotFrontage | LotArea | Street | LotShape | LandContour | Utilities | LotConfig | LandSlope | Neighborhood | Condition1 | Condition2 | BldgType | HouseStyle | OverallQual | OverallCond | YearBuilt | YearRemodAdd | RoofStyle | RoofMatl | Exterior1st | Exterior2nd | MasVnrType | MasVnrArea | ExterQual | ExterCond | Foundation | BsmtQual | BsmtCond | BsmtExposure | BsmtFinType1 | BsmtFinSF1 | BsmtFinType2 | BsmtFinSF2 | BsmtUnfSF | TotalBsmtSF | Heating | HeatingQC | CentralAir | Electrical | 1stFlrSF | 2ndFlrSF | LowQualFinSF | GrLivArea | BsmtFullBath | BsmtHalfBath | FullBath | HalfBath | BedroomAbvGr | KitchenAbvGr | KitchenQual | TotRmsAbvGrd | Functional | Fireplaces | FireplaceQu | GarageType | GarageYrBlt | GarageFinish | GarageCars | GarageArea | GarageQual | GarageCond | PavedDrive | WoodDeckSF | OpenPorchSF | EnclosedPorch | 3SsnPorch | ScreenPorch | PoolArea | MiscVal | MoSold | YrSold | SaleType | SaleCondition | SalePrice | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 60 | RL | 65.0 | 8450 | Pave | Reg | Lvl | AllPub | Inside | Gtl | CollgCr | Norm | Norm | 1Fam | 2Story | 7 | 5 | 2003 | 2003 | Gable | CompShg | VinylSd | VinylSd | BrkFace | 196.0 | Gd | TA | PConc | Gd | TA | No | GLQ | 706.0 | Unf | 0.0 | 150.0 | 856.0 | GasA | Ex | Y | SBrkr | 856 | 854 | 0 | 1710 | 1.0 | 0.0 | 2 | 1 | 3 | 1 | Gd | 8 | Typ | 0 | NaN | Attchd | 2003.0 | RFn | 2.0 | 548.0 | TA | TA | Y | 0 | 61 | 0 | 0 | 0 | 0 | 0 | 2 | 2008 | WD | Normal | 208500.0 |
1 | 2 | 20 | RL | 80.0 | 9600 | Pave | Reg | Lvl | AllPub | FR2 | Gtl | Veenker | Feedr | Norm | 1Fam | 1Story | 6 | 8 | 1976 | 1976 | Gable | CompShg | MetalSd | MetalSd | None | 0.0 | TA | TA | CBlock | Gd | TA | Gd | ALQ | 978.0 | Unf | 0.0 | 284.0 | 1262.0 | GasA | Ex | Y | SBrkr | 1262 | 0 | 0 | 1262 | 0.0 | 1.0 | 2 | 0 | 3 | 1 | TA | 6 | Typ | 1 | TA | Attchd | 1976.0 | RFn | 2.0 | 460.0 | TA | TA | Y | 298 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 2007 | WD | Normal | 181500.0 |
2 | 3 | 60 | RL | 68.0 | 11250 | Pave | IR1 | Lvl | AllPub | Inside | Gtl | CollgCr | Norm | Norm | 1Fam | 2Story | 7 | 5 | 2001 | 2002 | Gable | CompShg | VinylSd | VinylSd | BrkFace | 162.0 | Gd | TA | PConc | Gd | TA | Mn | GLQ | 486.0 | Unf | 0.0 | 434.0 | 920.0 | GasA | Ex | Y | SBrkr | 920 | 866 | 0 | 1786 | 1.0 | 0.0 | 2 | 1 | 3 | 1 | Gd | 6 | Typ | 1 | TA | Attchd | 2001.0 | RFn | 2.0 | 608.0 | TA | TA | Y | 0 | 42 | 0 | 0 | 0 | 0 | 0 | 9 | 2008 | WD | Normal | 223500.0 |
3 | 4 | 70 | RL | 60.0 | 9550 | Pave | IR1 | Lvl | AllPub | Corner | Gtl | Crawfor | Norm | Norm | 1Fam | 2Story | 7 | 5 | 1915 | 1970 | Gable | CompShg | Wd Sdng | Wd Shng | None | 0.0 | TA | TA | BrkTil | TA | Gd | No | ALQ | 216.0 | Unf | 0.0 | 540.0 | 756.0 | GasA | Gd | Y | SBrkr | 961 | 756 | 0 | 1717 | 1.0 | 0.0 | 1 | 0 | 3 | 1 | Gd | 7 | Typ | 1 | Gd | Detchd | 1998.0 | Unf | 3.0 | 642.0 | TA | TA | Y | 0 | 35 | 272 | 0 | 0 | 0 | 0 | 2 | 2006 | WD | Abnorml | 140000.0 |
4 | 5 | 60 | RL | 84.0 | 14260 | Pave | IR1 | Lvl | AllPub | FR2 | Gtl | NoRidge | Norm | Norm | 1Fam | 2Story | 8 | 5 | 2000 | 2000 | Gable | CompShg | VinylSd | VinylSd | BrkFace | 350.0 | Gd | TA | PConc | Gd | TA | Av | GLQ | 655.0 | Unf | 0.0 | 490.0 | 1145.0 | GasA | Ex | Y | SBrkr | 1145 | 1053 | 0 | 2198 | 1.0 | 0.0 | 2 | 1 | 4 | 1 | Gd | 9 | Typ | 1 | TA | Attchd | 2000.0 | RFn | 3.0 | 836.0 | TA | TA | Y | 192 | 84 | 0 | 0 | 0 | 0 | 0 | 12 | 2008 | WD | Normal | 250000.0 |
类别数据处理
序号编码
通常用来处理类别间具有大小关系的数据,比如成绩(高中低)
独热编码
通常用于处理类别间不具有大小关系的特征,比如血型(A型血、B型血、AB型血、O型血)
提示
- (1)在独热编码下,特征向量只有某一维取值为1,其余值均为0,因此可以利用向量的稀疏来节省空间
- (2)如果类别型的唯一类别元素较多,可能会造成维度灾难,因此需要利用特征选择来降低维度。
import copy
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
def data_class_processing(cls, data, columns):
for column in columns:
if cls == 'ohe':
ohe_data = pd.get_dummies(data[column], prefix=column)
data.drop(column, axis=1, inplace=True)
data = pd.concat([ohe_data, data], axis=1)
if cls == 'label':
le = LabelEncoder()
data[column] = le.fit_transform(data[column])
return data
columns = [
'MSSubClass', 'MSZoning', 'Street', 'LotShape', 'LandContour',
'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1',
'Condition2', 'BldgType', 'HouseStyle',
'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType',
'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond',
'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Heating', 'HeatingQC',
'Electrical',
'KitchenQual',
'Functional', 'FireplaceQu', 'GarageType',
'GarageFinish', 'GarageQual', 'GarageCond',
'PavedDrive',
'YrSold', 'SaleType', 'SaleCondition'
]
data = data_class_processing('ohe',data, columns)
# CentralAir
data['CentralAir'] = data['CentralAir'].map(lambda x: 1 if x == 'Y' else 0)
data.shape
(2919, 295)
data.head()
SaleCondition_Abnorml | SaleCondition_AdjLand | SaleCondition_Alloca | SaleCondition_Family | SaleCondition_Normal | SaleCondition_Partial | SaleType_COD | SaleType_CWD | SaleType_Con | SaleType_ConLD | SaleType_ConLI | SaleType_ConLw | SaleType_New | SaleType_Oth | SaleType_WD | YrSold_2006 | YrSold_2007 | YrSold_2008 | YrSold_2009 | YrSold_2010 | PavedDrive_N | PavedDrive_P | PavedDrive_Y | GarageCond_Ex | GarageCond_Fa | GarageCond_Gd | GarageCond_Po | GarageCond_TA | GarageQual_Ex | GarageQual_Fa | GarageQual_Gd | GarageQual_Po | GarageQual_TA | GarageFinish_Fin | GarageFinish_RFn | GarageFinish_Unf | GarageType_2Types | GarageType_Attchd | GarageType_Basment | GarageType_BuiltIn | GarageType_CarPort | GarageType_Detchd | FireplaceQu_Ex | FireplaceQu_Fa | FireplaceQu_Gd | FireplaceQu_Po | FireplaceQu_TA | Functional_Maj1 | Functional_Maj2 | Functional_Min1 | Functional_Min2 | Functional_Mod | Functional_Sev | Functional_Typ | KitchenQual_Ex | KitchenQual_Fa | KitchenQual_Gd | KitchenQual_TA | Electrical_FuseA | Electrical_FuseF | Electrical_FuseP | Electrical_Mix | Electrical_SBrkr | HeatingQC_Ex | HeatingQC_Fa | HeatingQC_Gd | HeatingQC_Po | HeatingQC_TA | Heating_Floor | Heating_GasA | Heating_GasW | Heating_Grav | Heating_OthW | Heating_Wall | BsmtFinType2_ALQ | BsmtFinType2_BLQ | BsmtFinType2_GLQ | BsmtFinType2_LwQ | BsmtFinType2_Rec | BsmtFinType2_Unf | BsmtFinType1_ALQ | BsmtFinType1_BLQ | BsmtFinType1_GLQ | BsmtFinType1_LwQ | BsmtFinType1_Rec | BsmtFinType1_Unf | BsmtExposure_Av | BsmtExposure_Gd | BsmtExposure_Mn | BsmtExposure_No | BsmtCond_Fa | BsmtCond_Gd | BsmtCond_Po | BsmtCond_TA | BsmtQual_Ex | BsmtQual_Fa | BsmtQual_Gd | BsmtQual_TA | Foundation_BrkTil | Foundation_CBlock | Foundation_PConc | Foundation_Slab | Foundation_Stone | Foundation_Wood | ExterCond_Ex | ExterCond_Fa | ExterCond_Gd | ExterCond_Po | ExterCond_TA | ExterQual_Ex | ExterQual_Fa | ExterQual_Gd | ExterQual_TA | MasVnrType_BrkCmn | MasVnrType_BrkFace | MasVnrType_None | MasVnrType_Stone | Exterior2nd_AsbShng | Exterior2nd_AsphShn | Exterior2nd_Brk Cmn | Exterior2nd_BrkFace | Exterior2nd_CBlock | Exterior2nd_CmentBd | Exterior2nd_HdBoard | Exterior2nd_ImStucc | Exterior2nd_MetalSd | Exterior2nd_Other | Exterior2nd_Plywood | Exterior2nd_Stone | Exterior2nd_Stucco | Exterior2nd_VinylSd | Exterior2nd_Wd Sdng | Exterior2nd_Wd Shng | Exterior1st_AsbShng | Exterior1st_AsphShn | Exterior1st_BrkComm | Exterior1st_BrkFace | Exterior1st_CBlock | Exterior1st_CemntBd | Exterior1st_HdBoard | Exterior1st_ImStucc | Exterior1st_MetalSd | Exterior1st_Plywood | Exterior1st_Stone | Exterior1st_Stucco | Exterior1st_VinylSd | Exterior1st_Wd Sdng | Exterior1st_WdShing | RoofMatl_ClyTile | RoofMatl_CompShg | RoofMatl_Membran | RoofMatl_Metal | RoofMatl_Roll | RoofMatl_Tar&Grv | RoofMatl_WdShake | RoofMatl_WdShngl | RoofStyle_Flat | RoofStyle_Gable | RoofStyle_Gambrel | RoofStyle_Hip | RoofStyle_Mansard | RoofStyle_Shed | HouseStyle_1.5Fin | HouseStyle_1.5Unf | HouseStyle_1Story | HouseStyle_2.5Fin | HouseStyle_2.5Unf | HouseStyle_2Story | HouseStyle_SFoyer | HouseStyle_SLvl | BldgType_1Fam | BldgType_2fmCon | BldgType_Duplex | BldgType_Twnhs | BldgType_TwnhsE | Condition2_Artery | Condition2_Feedr | Condition2_Norm | Condition2_PosA | Condition2_PosN | Condition2_RRAe | Condition2_RRAn | Condition2_RRNn | Condition1_Artery | Condition1_Feedr | Condition1_Norm | Condition1_PosA | Condition1_PosN | Condition1_RRAe | Condition1_RRAn | Condition1_RRNe | Condition1_RRNn | Neighborhood_Blmngtn | Neighborhood_Blueste | Neighborhood_BrDale | Neighborhood_BrkSide | Neighborhood_ClearCr | Neighborhood_CollgCr | Neighborhood_Crawfor | Neighborhood_Edwards | Neighborhood_Gilbert | Neighborhood_IDOTRR | Neighborhood_MeadowV | Neighborhood_Mitchel | Neighborhood_NAmes | Neighborhood_NPkVill | Neighborhood_NWAmes | Neighborhood_NoRidge | Neighborhood_NridgHt | Neighborhood_OldTown | Neighborhood_SWISU | Neighborhood_Sawyer | Neighborhood_SawyerW | Neighborhood_Somerst | Neighborhood_StoneBr | Neighborhood_Timber | Neighborhood_Veenker | LandSlope_Gtl | LandSlope_Mod | LandSlope_Sev | LotConfig_Corner | LotConfig_CulDSac | LotConfig_FR2 | LotConfig_FR3 | LotConfig_Inside | Utilities_AllPub | Utilities_NoSeWa | LandContour_Bnk | LandContour_HLS | LandContour_Low | LandContour_Lvl | LotShape_IR1 | LotShape_IR2 | LotShape_IR3 | LotShape_Reg | Street_Grvl | Street_Pave | MSZoning_C (all) | MSZoning_FV | MSZoning_RH | MSZoning_RL | MSZoning_RM | MSSubClass_20 | MSSubClass_30 | MSSubClass_40 | MSSubClass_45 | MSSubClass_50 | MSSubClass_60 | MSSubClass_70 | MSSubClass_75 | MSSubClass_80 | MSSubClass_85 | MSSubClass_90 | MSSubClass_120 | MSSubClass_150 | MSSubClass_160 | MSSubClass_180 | MSSubClass_190 | Id | LotFrontage | LotArea | OverallQual | OverallCond | YearBuilt | YearRemodAdd | MasVnrArea | BsmtFinSF1 | BsmtFinSF2 | BsmtUnfSF | TotalBsmtSF | CentralAir | 1stFlrSF | 2ndFlrSF | LowQualFinSF | GrLivArea | BsmtFullBath | BsmtHalfBath | FullBath | HalfBath | BedroomAbvGr | KitchenAbvGr | TotRmsAbvGrd | Fireplaces | GarageYrBlt | GarageCars | GarageArea | WoodDeckSF | OpenPorchSF | EnclosedPorch | 3SsnPorch | ScreenPorch | PoolArea | MiscVal | MoSold | SalePrice | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 65.0 | 8450 | 7 | 5 | 2003 | 2003 | 196.0 | 706.0 | 0.0 | 150.0 | 856.0 | 1 | 856 | 854 | 0 | 1710 | 1.0 | 0.0 | 2 | 1 | 3 | 1 | 8 | 0 | 2003.0 | 2.0 | 548.0 | 0 | 61 | 0 | 0 | 0 | 0 | 0 | 2 | 208500.0 |
1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 80.0 | 9600 | 6 | 8 | 1976 | 1976 | 0.0 | 978.0 | 0.0 | 284.0 | 1262.0 | 1 | 1262 | 0 | 0 | 1262 | 0.0 | 1.0 | 2 | 0 | 3 | 1 | 6 | 1 | 1976.0 | 2.0 | 460.0 | 298 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 181500.0 |
2 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 68.0 | 11250 | 7 | 5 | 2001 | 2002 | 162.0 | 486.0 | 0.0 | 434.0 | 920.0 | 1 | 920 | 866 | 0 | 1786 | 1.0 | 0.0 | 2 | 1 | 3 | 1 | 6 | 1 | 2001.0 | 2.0 | 608.0 | 0 | 42 | 0 | 0 | 0 | 0 | 0 | 9 | 223500.0 |
3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 60.0 | 9550 | 7 | 5 | 1915 | 1970 | 0.0 | 216.0 | 0.0 | 540.0 | 756.0 | 1 | 961 | 756 | 0 | 1717 | 1.0 | 0.0 | 1 | 0 | 3 | 1 | 7 | 1 | 1998.0 | 3.0 | 642.0 | 0 | 35 | 272 | 0 | 0 | 0 | 0 | 2 | 140000.0 |
4 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 84.0 | 14260 | 8 | 5 | 2000 | 2000 | 350.0 | 655.0 | 0.0 | 490.0 | 1145.0 | 1 | 1145 | 1053 | 0 | 2198 | 1.0 | 0.0 | 2 | 1 | 4 | 1 | 9 | 1 | 2000.0 | 3.0 | 836.0 | 192 | 84 | 0 | 0 | 0 | 0 | 0 | 12 | 250000.0 |
缺失值处理
missing_data(data)
Total | Percent | |
---|---|---|
SalePrice | 1459 | 49.982871 |
LotFrontage | 486 | 16.649538 |
GarageYrBlt | 159 | 5.447071 |
MasVnrArea | 23 | 0.787941 |
BsmtFullBath | 2 | 0.068517 |
BsmtHalfBath | 2 | 0.068517 |
GarageCars | 1 | 0.034258 |
BsmtFinSF2 | 1 | 0.034258 |
BsmtFinSF1 | 1 | 0.034258 |
BsmtUnfSF | 1 | 0.034258 |
TotalBsmtSF | 1 | 0.034258 |
GarageArea | 1 | 0.034258 |
BsmtQual_Gd | 0 | 0.000000 |
BsmtQual_TA | 0 | 0.000000 |
Foundation_BrkTil | 0 | 0.000000 |
BsmtQual_Ex | 0 | 0.000000 |
BsmtCond_TA | 0 | 0.000000 |
BsmtCond_Po | 0 | 0.000000 |
Foundation_CBlock | 0 | 0.000000 |
Foundation_PConc | 0 | 0.000000 |
BsmtCond_Gd | 0 | 0.000000 |
BsmtCond_Fa | 0 | 0.000000 |
Foundation_Slab | 0 | 0.000000 |
Foundation_Stone | 0 | 0.000000 |
Foundation_Wood | 0 | 0.000000 |
BsmtQual_Fa | 0 | 0.000000 |
BsmtExposure_Mn | 0 | 0.000000 |
BsmtExposure_No | 0 | 0.000000 |
BsmtFinType2_Unf | 0 | 0.000000 |
Heating_OthW | 0 | 0.000000 |
Heating_Wall | 0 | 0.000000 |
BsmtFinType2_ALQ | 0 | 0.000000 |
BsmtFinType2_BLQ | 0 | 0.000000 |
BsmtFinType2_GLQ | 0 | 0.000000 |
BsmtFinType2_LwQ | 0 | 0.000000 |
BsmtFinType2_Rec | 0 | 0.000000 |
BsmtFinType1_ALQ | 0 | 0.000000 |
ExterCond_Fa | 0 | 0.000000 |
BsmtFinType1_BLQ | 0 | 0.000000 |
BsmtFinType1_GLQ | 0 | 0.000000 |
BsmtFinType1_LwQ | 0 | 0.000000 |
BsmtFinType1_Rec | 0 | 0.000000 |
BsmtFinType1_Unf | 0 | 0.000000 |
BsmtExposure_Av | 0 | 0.000000 |
BsmtExposure_Gd | 0 | 0.000000 |
ExterCond_Ex | 0 | 0.000000 |
ExterQual_Gd | 0 | 0.000000 |
ExterCond_Gd | 0 | 0.000000 |
Exterior2nd_Wd Shng | 0 | 0.000000 |
Exterior2nd_Other | 0 | 0.000000 |
Exterior2nd_Plywood | 0 | 0.000000 |
Exterior2nd_Stone | 0 | 0.000000 |
Exterior2nd_Stucco | 0 | 0.000000 |
Exterior2nd_VinylSd | 0 | 0.000000 |
Exterior2nd_Wd Sdng | 0 | 0.000000 |
Exterior1st_AsbShng | 0 | 0.000000 |
Exterior2nd_ImStucc | 0 | 0.000000 |
Exterior1st_AsphShn | 0 | 0.000000 |
Exterior1st_BrkComm | 0 | 0.000000 |
Exterior1st_BrkFace | 0 | 0.000000 |
Exterior1st_CBlock | 0 | 0.000000 |
Exterior1st_CemntBd | 0 | 0.000000 |
Exterior1st_HdBoard | 0 | 0.000000 |
Exterior2nd_MetalSd | 0 | 0.000000 |
Exterior2nd_HdBoard | 0 | 0.000000 |
ExterCond_Po | 0 | 0.000000 |
MasVnrType_BrkFace | 0 | 0.000000 |
ExterCond_TA | 0 | 0.000000 |
ExterQual_Ex | 0 | 0.000000 |
ExterQual_Fa | 0 | 0.000000 |
Heating_GasW | 0 | 0.000000 |
ExterQual_TA | 0 | 0.000000 |
MasVnrType_BrkCmn | 0 | 0.000000 |
MasVnrType_None | 0 | 0.000000 |
Exterior2nd_CmentBd | 0 | 0.000000 |
MasVnrType_Stone | 0 | 0.000000 |
Exterior2nd_AsbShng | 0 | 0.000000 |
Exterior2nd_AsphShn | 0 | 0.000000 |
Exterior2nd_Brk Cmn | 0 | 0.000000 |
Exterior2nd_BrkFace | 0 | 0.000000 |
Exterior2nd_CBlock | 0 | 0.000000 |
Heating_Grav | 0 | 0.000000 |
HeatingQC_Fa | 0 | 0.000000 |
Heating_GasA | 0 | 0.000000 |
GarageCond_Gd | 0 | 0.000000 |
YrSold_2010 | 0 | 0.000000 |
PavedDrive_N | 0 | 0.000000 |
PavedDrive_P | 0 | 0.000000 |
PavedDrive_Y | 0 | 0.000000 |
GarageCond_Ex | 0 | 0.000000 |
GarageCond_Fa | 0 | 0.000000 |
GarageCond_Po | 0 | 0.000000 |
YrSold_2008 | 0 | 0.000000 |
GarageCond_TA | 0 | 0.000000 |
GarageQual_Ex | 0 | 0.000000 |
GarageQual_Fa | 0 | 0.000000 |
GarageQual_Gd | 0 | 0.000000 |
GarageQual_Po | 0 | 0.000000 |
GarageQual_TA | 0 | 0.000000 |
YrSold_2009 | 0 | 0.000000 |
YrSold_2007 | 0 | 0.000000 |
Heating_Floor | 0 | 0.000000 |
SaleType_CWD | 0 | 0.000000 |
SaleCondition_AdjLand | 0 | 0.000000 |
SaleCondition_Alloca | 0 | 0.000000 |
SaleCondition_Family | 0 | 0.000000 |
SaleCondition_Normal | 0 | 0.000000 |
SaleCondition_Partial | 0 | 0.000000 |
SaleType_COD | 0 | 0.000000 |
SaleType_Con | 0 | 0.000000 |
YrSold_2006 | 0 | 0.000000 |
SaleType_ConLD | 0 | 0.000000 |
SaleType_ConLI | 0 | 0.000000 |
SaleType_ConLw | 0 | 0.000000 |
SaleType_New | 0 | 0.000000 |
SaleType_Oth | 0 | 0.000000 |
SaleType_WD | 0 | 0.000000 |
GarageFinish_Fin | 0 | 0.000000 |
GarageFinish_RFn | 0 | 0.000000 |
GarageFinish_Unf | 0 | 0.000000 |
Electrical_FuseP | 0 | 0.000000 |
KitchenQual_Ex | 0 | 0.000000 |
KitchenQual_Fa | 0 | 0.000000 |
KitchenQual_Gd | 0 | 0.000000 |
KitchenQual_TA | 0 | 0.000000 |
Electrical_FuseA | 0 | 0.000000 |
Electrical_FuseF | 0 | 0.000000 |
Electrical_Mix | 0 | 0.000000 |
GarageType_2Types | 0 | 0.000000 |
Electrical_SBrkr | 0 | 0.000000 |
HeatingQC_Ex | 0 | 0.000000 |
Exterior1st_MetalSd | 0 | 0.000000 |
HeatingQC_Gd | 0 | 0.000000 |
HeatingQC_Po | 0 | 0.000000 |
HeatingQC_TA | 0 | 0.000000 |
Functional_Typ | 0 | 0.000000 |
Functional_Sev | 0 | 0.000000 |
Functional_Mod | 0 | 0.000000 |
Functional_Min2 | 0 | 0.000000 |
Functional_Min1 | 0 | 0.000000 |
Functional_Maj2 | 0 | 0.000000 |
Functional_Maj1 | 0 | 0.000000 |
FireplaceQu_TA | 0 | 0.000000 |
FireplaceQu_Po | 0 | 0.000000 |
FireplaceQu_Gd | 0 | 0.000000 |
FireplaceQu_Fa | 0 | 0.000000 |
FireplaceQu_Ex | 0 | 0.000000 |
GarageType_Detchd | 0 | 0.000000 |
GarageType_CarPort | 0 | 0.000000 |
GarageType_BuiltIn | 0 | 0.000000 |
GarageType_Basment | 0 | 0.000000 |
GarageType_Attchd | 0 | 0.000000 |
Exterior1st_ImStucc | 0 | 0.000000 |
Exterior1st_WdShing | 0 | 0.000000 |
Exterior1st_Plywood | 0 | 0.000000 |
MSZoning_RH | 0 | 0.000000 |
LotShape_IR3 | 0 | 0.000000 |
LotShape_Reg | 0 | 0.000000 |
Street_Grvl | 0 | 0.000000 |
Street_Pave | 0 | 0.000000 |
MSZoning_C (all) | 0 | 0.000000 |
MSZoning_FV | 0 | 0.000000 |
MSZoning_RL | 0 | 0.000000 |
LotShape_IR1 | 0 | 0.000000 |
MSZoning_RM | 0 | 0.000000 |
MSSubClass_20 | 0 | 0.000000 |
MSSubClass_30 | 0 | 0.000000 |
MSSubClass_40 | 0 | 0.000000 |
MSSubClass_45 | 0 | 0.000000 |
MSSubClass_50 | 0 | 0.000000 |
LotShape_IR2 | 0 | 0.000000 |
LandContour_Lvl | 0 | 0.000000 |
Neighborhood_Somerst | 0 | 0.000000 |
LotConfig_CulDSac | 0 | 0.000000 |
Neighborhood_Timber | 0 | 0.000000 |
Neighborhood_Veenker | 0 | 0.000000 |
LandSlope_Gtl | 0 | 0.000000 |
LandSlope_Mod | 0 | 0.000000 |
LandSlope_Sev | 0 | 0.000000 |
LotConfig_Corner | 0 | 0.000000 |
LotConfig_FR2 | 0 | 0.000000 |
LandContour_Low | 0 | 0.000000 |
LotConfig_FR3 | 0 | 0.000000 |
LotConfig_Inside | 0 | 0.000000 |
Utilities_AllPub | 0 | 0.000000 |
Utilities_NoSeWa | 0 | 0.000000 |
LandContour_Bnk | 0 | 0.000000 |
LandContour_HLS | 0 | 0.000000 |
MSSubClass_60 | 0 | 0.000000 |
MSSubClass_70 | 0 | 0.000000 |
MSSubClass_75 | 0 | 0.000000 |
Fireplaces | 0 | 0.000000 |
GrLivArea | 0 | 0.000000 |
FullBath | 0 | 0.000000 |
HalfBath | 0 | 0.000000 |
BedroomAbvGr | 0 | 0.000000 |
KitchenAbvGr | 0 | 0.000000 |
TotRmsAbvGrd | 0 | 0.000000 |
WoodDeckSF | 0 | 0.000000 |
MSSubClass_80 | 0 | 0.000000 |
OpenPorchSF | 0 | 0.000000 |
EnclosedPorch | 0 | 0.000000 |
3SsnPorch | 0 | 0.000000 |
ScreenPorch | 0 | 0.000000 |
PoolArea | 0 | 0.000000 |
MiscVal | 0 | 0.000000 |
LowQualFinSF | 0 | 0.000000 |
2ndFlrSF | 0 | 0.000000 |
1stFlrSF | 0 | 0.000000 |
CentralAir | 0 | 0.000000 |
YearRemodAdd | 0 | 0.000000 |
YearBuilt | 0 | 0.000000 |
OverallCond | 0 | 0.000000 |
OverallQual | 0 | 0.000000 |
LotArea | 0 | 0.000000 |
Id | 0 | 0.000000 |
MSSubClass_190 | 0 | 0.000000 |
MSSubClass_180 | 0 | 0.000000 |
MSSubClass_160 | 0 | 0.000000 |
MSSubClass_150 | 0 | 0.000000 |
MSSubClass_120 | 0 | 0.000000 |
MSSubClass_90 | 0 | 0.000000 |
MSSubClass_85 | 0 | 0.000000 |
Neighborhood_StoneBr | 0 | 0.000000 |
Neighborhood_SawyerW | 0 | 0.000000 |
Exterior1st_Stone | 0 | 0.000000 |
HouseStyle_SFoyer | 0 | 0.000000 |
HouseStyle_1.5Fin | 0 | 0.000000 |
HouseStyle_1.5Unf | 0 | 0.000000 |
HouseStyle_1Story | 0 | 0.000000 |
HouseStyle_2.5Fin | 0 | 0.000000 |
HouseStyle_2.5Unf | 0 | 0.000000 |
HouseStyle_2Story | 0 | 0.000000 |
HouseStyle_SLvl | 0 | 0.000000 |
RoofStyle_Mansard | 0 | 0.000000 |
BldgType_1Fam | 0 | 0.000000 |
BldgType_2fmCon | 0 | 0.000000 |
BldgType_Duplex | 0 | 0.000000 |
BldgType_Twnhs | 0 | 0.000000 |
BldgType_TwnhsE | 0 | 0.000000 |
Condition2_Artery | 0 | 0.000000 |
RoofStyle_Shed | 0 | 0.000000 |
RoofStyle_Hip | 0 | 0.000000 |
Neighborhood_Sawyer | 0 | 0.000000 |
RoofMatl_Membran | 0 | 0.000000 |
Exterior1st_Stucco | 0 | 0.000000 |
Exterior1st_VinylSd | 0 | 0.000000 |
Exterior1st_Wd Sdng | 0 | 0.000000 |
MoSold | 0 | 0.000000 |
RoofMatl_ClyTile | 0 | 0.000000 |
RoofMatl_CompShg | 0 | 0.000000 |
RoofMatl_Metal | 0 | 0.000000 |
RoofStyle_Gambrel | 0 | 0.000000 |
RoofMatl_Roll | 0 | 0.000000 |
RoofMatl_Tar&Grv | 0 | 0.000000 |
RoofMatl_WdShake | 0 | 0.000000 |
RoofMatl_WdShngl | 0 | 0.000000 |
RoofStyle_Flat | 0 | 0.000000 |
RoofStyle_Gable | 0 | 0.000000 |
Condition2_Feedr | 0 | 0.000000 |
Condition2_Norm | 0 | 0.000000 |
Condition2_PosA | 0 | 0.000000 |
Neighborhood_Mitchel | 0 | 0.000000 |
Neighborhood_CollgCr | 0 | 0.000000 |
Neighborhood_Crawfor | 0 | 0.000000 |
Neighborhood_Edwards | 0 | 0.000000 |
Neighborhood_Gilbert | 0 | 0.000000 |
Neighborhood_IDOTRR | 0 | 0.000000 |
Neighborhood_MeadowV | 0 | 0.000000 |
Neighborhood_NAmes | 0 | 0.000000 |
Condition2_PosN | 0 | 0.000000 |
Neighborhood_NPkVill | 0 | 0.000000 |
Neighborhood_NWAmes | 0 | 0.000000 |
Neighborhood_NoRidge | 0 | 0.000000 |
Neighborhood_NridgHt | 0 | 0.000000 |
Neighborhood_OldTown | 0 | 0.000000 |
Neighborhood_SWISU | 0 | 0.000000 |
Neighborhood_ClearCr | 0 | 0.000000 |
Neighborhood_BrkSide | 0 | 0.000000 |
Neighborhood_BrDale | 0 | 0.000000 |
Neighborhood_Blueste | 0 | 0.000000 |
Neighborhood_Blmngtn | 0 | 0.000000 |
Condition1_RRNn | 0 | 0.000000 |
Condition1_RRNe | 0 | 0.000000 |
Condition1_RRAn | 0 | 0.000000 |
Condition1_RRAe | 0 | 0.000000 |
Condition1_PosN | 0 | 0.000000 |
Condition1_PosA | 0 | 0.000000 |
Condition1_Norm | 0 | 0.000000 |
Condition1_Feedr | 0 | 0.000000 |
Condition1_Artery | 0 | 0.000000 |
Condition2_RRNn | 0 | 0.000000 |
Condition2_RRAn | 0 | 0.000000 |
Condition2_RRAe | 0 | 0.000000 |
SaleCondition_Abnorml | 0 | 0.000000 |
# 众数填充
def mode_fill(df,columns):
for col in columns:
if df[col].isnull().sum() > 0: # 有缺失值就进行众数填充
print(df[col].mode()[0])
df[col].fillna(df[col].mode()[0], inplace=True)
return df
columns = ['LotFrontage', 'GarageYrBlt', 'MasVnrArea','BsmtFullBath', 'BsmtHalfBath', 'GarageCars', 'BsmtFinSF2',
'BsmtFinSF1', 'BsmtUnfSF', 'TotalBsmtSF', 'GarageArea']
data = mode_fill(data,columns)
60.0
2005.0
0.0
0.0
0.0
2.0
0.0
0.0
0.0
0.0
0.0
data.shape
(2919, 295)
data.head()
SaleCondition_Abnorml | SaleCondition_AdjLand | SaleCondition_Alloca | SaleCondition_Family | SaleCondition_Normal | SaleCondition_Partial | SaleType_COD | SaleType_CWD | SaleType_Con | SaleType_ConLD | SaleType_ConLI | SaleType_ConLw | SaleType_New | SaleType_Oth | SaleType_WD | YrSold_2006 | YrSold_2007 | YrSold_2008 | YrSold_2009 | YrSold_2010 | PavedDrive_N | PavedDrive_P | PavedDrive_Y | GarageCond_Ex | GarageCond_Fa | GarageCond_Gd | GarageCond_Po | GarageCond_TA | GarageQual_Ex | GarageQual_Fa | GarageQual_Gd | GarageQual_Po | GarageQual_TA | GarageFinish_Fin | GarageFinish_RFn | GarageFinish_Unf | GarageType_2Types | GarageType_Attchd | GarageType_Basment | GarageType_BuiltIn | GarageType_CarPort | GarageType_Detchd | FireplaceQu_Ex | FireplaceQu_Fa | FireplaceQu_Gd | FireplaceQu_Po | FireplaceQu_TA | Functional_Maj1 | Functional_Maj2 | Functional_Min1 | Functional_Min2 | Functional_Mod | Functional_Sev | Functional_Typ | KitchenQual_Ex | KitchenQual_Fa | KitchenQual_Gd | KitchenQual_TA | Electrical_FuseA | Electrical_FuseF | Electrical_FuseP | Electrical_Mix | Electrical_SBrkr | HeatingQC_Ex | HeatingQC_Fa | HeatingQC_Gd | HeatingQC_Po | HeatingQC_TA | Heating_Floor | Heating_GasA | Heating_GasW | Heating_Grav | Heating_OthW | Heating_Wall | BsmtFinType2_ALQ | BsmtFinType2_BLQ | BsmtFinType2_GLQ | BsmtFinType2_LwQ | BsmtFinType2_Rec | BsmtFinType2_Unf | BsmtFinType1_ALQ | BsmtFinType1_BLQ | BsmtFinType1_GLQ | BsmtFinType1_LwQ | BsmtFinType1_Rec | BsmtFinType1_Unf | BsmtExposure_Av | BsmtExposure_Gd | BsmtExposure_Mn | BsmtExposure_No | BsmtCond_Fa | BsmtCond_Gd | BsmtCond_Po | BsmtCond_TA | BsmtQual_Ex | BsmtQual_Fa | BsmtQual_Gd | BsmtQual_TA | Foundation_BrkTil | Foundation_CBlock | Foundation_PConc | Foundation_Slab | Foundation_Stone | Foundation_Wood | ExterCond_Ex | ExterCond_Fa | ExterCond_Gd | ExterCond_Po | ExterCond_TA | ExterQual_Ex | ExterQual_Fa | ExterQual_Gd | ExterQual_TA | MasVnrType_BrkCmn | MasVnrType_BrkFace | MasVnrType_None | MasVnrType_Stone | Exterior2nd_AsbShng | Exterior2nd_AsphShn | Exterior2nd_Brk Cmn | Exterior2nd_BrkFace | Exterior2nd_CBlock | Exterior2nd_CmentBd | Exterior2nd_HdBoard | Exterior2nd_ImStucc | Exterior2nd_MetalSd | Exterior2nd_Other | Exterior2nd_Plywood | Exterior2nd_Stone | Exterior2nd_Stucco | Exterior2nd_VinylSd | Exterior2nd_Wd Sdng | Exterior2nd_Wd Shng | Exterior1st_AsbShng | Exterior1st_AsphShn | Exterior1st_BrkComm | Exterior1st_BrkFace | Exterior1st_CBlock | Exterior1st_CemntBd | Exterior1st_HdBoard | Exterior1st_ImStucc | Exterior1st_MetalSd | Exterior1st_Plywood | Exterior1st_Stone | Exterior1st_Stucco | Exterior1st_VinylSd | Exterior1st_Wd Sdng | Exterior1st_WdShing | RoofMatl_ClyTile | RoofMatl_CompShg | RoofMatl_Membran | RoofMatl_Metal | RoofMatl_Roll | RoofMatl_Tar&Grv | RoofMatl_WdShake | RoofMatl_WdShngl | RoofStyle_Flat | RoofStyle_Gable | RoofStyle_Gambrel | RoofStyle_Hip | RoofStyle_Mansard | RoofStyle_Shed | HouseStyle_1.5Fin | HouseStyle_1.5Unf | HouseStyle_1Story | HouseStyle_2.5Fin | HouseStyle_2.5Unf | HouseStyle_2Story | HouseStyle_SFoyer | HouseStyle_SLvl | BldgType_1Fam | BldgType_2fmCon | BldgType_Duplex | BldgType_Twnhs | BldgType_TwnhsE | Condition2_Artery | Condition2_Feedr | Condition2_Norm | Condition2_PosA | Condition2_PosN | Condition2_RRAe | Condition2_RRAn | Condition2_RRNn | Condition1_Artery | Condition1_Feedr | Condition1_Norm | Condition1_PosA | Condition1_PosN | Condition1_RRAe | Condition1_RRAn | Condition1_RRNe | Condition1_RRNn | Neighborhood_Blmngtn | Neighborhood_Blueste | Neighborhood_BrDale | Neighborhood_BrkSide | Neighborhood_ClearCr | Neighborhood_CollgCr | Neighborhood_Crawfor | Neighborhood_Edwards | Neighborhood_Gilbert | Neighborhood_IDOTRR | Neighborhood_MeadowV | Neighborhood_Mitchel | Neighborhood_NAmes | Neighborhood_NPkVill | Neighborhood_NWAmes | Neighborhood_NoRidge | Neighborhood_NridgHt | Neighborhood_OldTown | Neighborhood_SWISU | Neighborhood_Sawyer | Neighborhood_SawyerW | Neighborhood_Somerst | Neighborhood_StoneBr | Neighborhood_Timber | Neighborhood_Veenker | LandSlope_Gtl | LandSlope_Mod | LandSlope_Sev | LotConfig_Corner | LotConfig_CulDSac | LotConfig_FR2 | LotConfig_FR3 | LotConfig_Inside | Utilities_AllPub | Utilities_NoSeWa | LandContour_Bnk | LandContour_HLS | LandContour_Low | LandContour_Lvl | LotShape_IR1 | LotShape_IR2 | LotShape_IR3 | LotShape_Reg | Street_Grvl | Street_Pave | MSZoning_C (all) | MSZoning_FV | MSZoning_RH | MSZoning_RL | MSZoning_RM | MSSubClass_20 | MSSubClass_30 | MSSubClass_40 | MSSubClass_45 | MSSubClass_50 | MSSubClass_60 | MSSubClass_70 | MSSubClass_75 | MSSubClass_80 | MSSubClass_85 | MSSubClass_90 | MSSubClass_120 | MSSubClass_150 | MSSubClass_160 | MSSubClass_180 | MSSubClass_190 | Id | LotFrontage | LotArea | OverallQual | OverallCond | YearBuilt | YearRemodAdd | MasVnrArea | BsmtFinSF1 | BsmtFinSF2 | BsmtUnfSF | TotalBsmtSF | CentralAir | 1stFlrSF | 2ndFlrSF | LowQualFinSF | GrLivArea | BsmtFullBath | BsmtHalfBath | FullBath | HalfBath | BedroomAbvGr | KitchenAbvGr | TotRmsAbvGrd | Fireplaces | GarageYrBlt | GarageCars | GarageArea | WoodDeckSF | OpenPorchSF | EnclosedPorch | 3SsnPorch | ScreenPorch | PoolArea | MiscVal | MoSold | SalePrice | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 65.0 | 8450 | 7 | 5 | 2003 | 2003 | 196.0 | 706.0 | 0.0 | 150.0 | 856.0 | 1 | 856 | 854 | 0 | 1710 | 1.0 | 0.0 | 2 | 1 | 3 | 1 | 8 | 0 | 2003.0 | 2.0 | 548.0 | 0 | 61 | 0 | 0 | 0 | 0 | 0 | 2 | 208500.0 |
1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 80.0 | 9600 | 6 | 8 | 1976 | 1976 | 0.0 | 978.0 | 0.0 | 284.0 | 1262.0 | 1 | 1262 | 0 | 0 | 1262 | 0.0 | 1.0 | 2 | 0 | 3 | 1 | 6 | 1 | 1976.0 | 2.0 | 460.0 | 298 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 181500.0 |
2 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 68.0 | 11250 | 7 | 5 | 2001 | 2002 | 162.0 | 486.0 | 0.0 | 434.0 | 920.0 | 1 | 920 | 866 | 0 | 1786 | 1.0 | 0.0 | 2 | 1 | 3 | 1 | 6 | 1 | 2001.0 | 2.0 | 608.0 | 0 | 42 | 0 | 0 | 0 | 0 | 0 | 9 | 223500.0 |
3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 60.0 | 9550 | 7 | 5 | 1915 | 1970 | 0.0 | 216.0 | 0.0 | 540.0 | 756.0 | 1 | 961 | 756 | 0 | 1717 | 1.0 | 0.0 | 1 | 0 | 3 | 1 | 7 | 1 | 1998.0 | 3.0 | 642.0 | 0 | 35 | 272 | 0 | 0 | 0 | 0 | 2 | 140000.0 |
4 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 84.0 | 14260 | 8 | 5 | 2000 | 2000 | 350.0 | 655.0 | 0.0 | 490.0 | 1145.0 | 1 | 1145 | 1053 | 0 | 2198 | 1.0 | 0.0 | 2 | 1 | 4 | 1 | 9 | 1 | 2000.0 | 3.0 | 836.0 | 192 | 84 | 0 | 0 | 0 | 0 | 0 | 12 | 250000.0 |
train = data[data['SalePrice'].notnull()]
test = data[data['SalePrice'].isnull()].drop(['SalePrice'],axis=1)
print(train.shape)
print(test.shape)
(1460, 295)
(1459, 294)
score,_ = xgb_eval(train)
last_score = score
print(last_score)
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 7.7s remaining: 11.6s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 10.3s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13210981084409507
run time is: 12 秒
0.13210981084409507
离群值查看
箱型图法
import numpy as np
def boxplot(data):
# 下四分位数值、中位数,上四分位数值
Q1, median, Q3 = np.percentile(data, (25, 50, 75), interpolation='midpoint')
# 四分位距
IQR = Q3 - Q1
# 内限
inner = [Q1-1.5*IQR, Q3+1.5*IQR]
# 外限
outer = [Q1-3.0*IQR, Q3+3.0*IQR]
# print('>>>内限:', inner)
# print('>>>外限:', outer)
# 过滤掉极端异常值
# print(len(data))
goodData = []
for value in data:
if (value < outer[1]) and (value > outer[0]):
goodData.append(value)
print(f"异常值个数:{len(data)-len(goodData)}")
# return f"异常值个数:{len(data)-len(goodData)}"
columns = [
'LotFrontage','LotArea','MasVnrArea','BsmtFinSF1',
'BsmtFinSF2','BsmtUnfSF','TotalBsmtSF','1stFlrSF',
'2ndFlrSF','LowQualFinSF','GrLivArea' ,'GarageArea',
'WoodDeckSF','OpenPorchSF','EnclosedPorch',
'3SsnPorch','ScreenPorch','PoolArea','MiscVal'
]
for col in columns:
print(col)
boxplot(train[col])
LotFrontage
异常值个数:16
LotArea
异常值个数:34
MasVnrArea
异常值个数:28
BsmtFinSF1
异常值个数:1
BsmtFinSF2
异常值个数:1460
BsmtUnfSF
异常值个数:0
TotalBsmtSF
异常值个数:5
1stFlrSF
异常值个数:3
2ndFlrSF
异常值个数:0
LowQualFinSF
异常值个数:1460
GrLivArea
异常值个数:4
GarageArea
异常值个数:3
WoodDeckSF
异常值个数:3
OpenPorchSF
异常值个数:18
EnclosedPorch
异常值个数:1460
3SsnPorch
异常值个数:1460
ScreenPorch
异常值个数:1460
PoolArea
异常值个数:1460
MiscVal
异常值个数:1460
先未作处理
无量纲化(xgboost不需要)
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import MaxAbsScaler
def nondimensionalized(cls, data, columns):
def sigmoid(df):
x_sigmoid = []
for item in columns:
S = 1/(1+np.exp(-df[item]))
x_sigmoid.append(S)
return np.array(np.matrix(x_sigmoid).T)
def feature_importance(df):
x_sum_scaler = []
for item in columns:
S = np.sum(df[item])
FI = df[item]/S
x_sum_scaler.append(FI)
return np.array(np.matrix(x_sum_scaler).T)
if cls == 'minmax': # 区间缩放法-极差标准化
mm = MinMaxScaler()
data[columns] = mm.fit_transform(data.loc[:,columns])
if cls == 'maxabs': # 极大值标准化
ma = MaxAbsScaler()
data[columns] = ma.fit_transform(data.loc[:,columns])
if cls == 'zscore':
ss = StandardScaler()
data[columns] = ss.fit_transform(data.loc[:,columns])
if cls == 'feature_importance':
feature_importance(data)
if cls == 'sigmoid':
data[columns] = sigmoid(data)
return data
无监督离散化之分箱法
# 日期
# 19.YearBuilt, 112
# 20.YearRemodAdd 61
# 59.GarageYrBlt 98
# 等宽分箱
# cut将根据值本身来选择箱子均匀间隔,即每个箱子的间距都是相同的
columns = ['YearBuilt','YearRemodAdd', 'GarageYrBlt']
best_k = [0, 0, 0]
for col in columns:
for k in range(2, 30):
data_tmp = data.copy()
data_tmp[col] = pd.cut(data_tmp[col],k,labels=False)
train = data_tmp[data_tmp['SalePrice'].notnull()]
test = data_tmp[data_tmp['SalePrice'].isnull()].drop(['SalePrice'],axis=1)
score,_ = xgb_eval(train)
if score < last_score:
last_score = score
data = data_tmp
best_k[columns.index(col)] = k
print('********')
print(best_k)
print(last_score)
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.9s remaining: 7.4s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.6s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.1325314918976797
run time is: 9 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.1s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.3s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13128651826155793
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.1s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.3s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13128651826155793
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.8s remaining: 7.2s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.4s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13128651826155793
run time is: 9 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.1s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.3s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13128651826155793
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.1s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.3s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13128651826155793
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.9s remaining: 7.3s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.5s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13128651826155793
run time is: 9 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 5.2s remaining: 7.9s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.8s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13128651826155793
run time is: 9 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.1s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13128651826155793
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.1s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.3s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13128651826155793
run time is: 9 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.1s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.3s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13128651826155793
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.1s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.3s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13128651826155793
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.1s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.3s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13128651826155793
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.6s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13128651826155793
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.6s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13128651826155793
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13128651826155793
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.1s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13128651826155793
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13128651826155793
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.9s remaining: 7.4s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.8s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13128651826155793
run time is: 9 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.6s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13128651826155793
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.8s remaining: 7.3s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.5s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13128651826155793
run time is: 9 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.1s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13128651826155793
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.2s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.3s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13128651826155793
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13128651826155793
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.6s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13128651826155793
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.3s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13128651826155793
run time is: 9 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13128651826155793
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.1s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13128651826155793
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.1s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13261864972243745
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13344455220740342
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.6s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13307043441098165
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.6s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.1314590441980213
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.6s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.133694047769719
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13192230270881222
run time is: 9 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.1s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13292568537231955
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.4s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13398849705479246
run time is: 9 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.1s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.1318105711452387
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.1s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.4s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13217414742944494
run time is: 9 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.1s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.1319704816242039
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.8s remaining: 7.2s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.3s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13324929266598454
run time is: 9 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.1s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.1309254483960009
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.3s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.1309254483960009
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.6s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13092695074120808
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13092632356109551
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13093923391375495
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.6s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.1309254483960009
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.1s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13094010918297302
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 5.0s remaining: 7.6s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.6s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.1309254483960009
run time is: 9 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.1s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13094073625896213
run time is: 9 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.8s remaining: 7.2s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.3s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13092632356109551
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.6s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13093923391375495
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.8s remaining: 7.2s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.3s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.1309254483960009
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.6s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.1309254483960009
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.6s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.1309254483960009
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.1309254483960009
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.1s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.3s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13092695074120808
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.1316914528478866
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.1s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.1317691503724471
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.6s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13155167828308173
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.6s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13133331798520795
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.6s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.4s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13186260953000736
run time is: 9 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.6s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13142461820170612
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13145574635885626
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.6s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.1318834661627718
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.2s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.3s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.1311596674312157
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.6s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13096455806523824
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.6s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13119017558334428
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.6s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13129433089210113
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.6s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13121513728294185
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.6s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.3s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13208848359797648
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.1s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.3s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13136700646533622
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.8s remaining: 7.2s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.4s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.1316569403135553
run time is: 9 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.1s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13120202438894077
run time is: 9 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.1s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.3s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.1318657365901918
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 5.1s remaining: 7.6s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.6s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13182170074446148
run time is: 9 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.6s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13133005698117176
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.9s remaining: 7.5s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.5s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.1314027454132637
run time is: 9 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.2s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.3s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13164172403740695
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.6s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13155482094502888
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.6s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13269396984932624
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.6s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13210424560599787
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.1s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13141174441143938
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.13141970000036757
run time is: 8 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.7s remaining: 7.1s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.2s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.1319578199368307
run time is: 8 秒
********
[3, 14, 0]
0.1309254483960009
# [1, 7, 0]
# 0.13132731648769486
# [3, 14, 0]
# 0.1309254483960009
特征构造
def xgb_feature_importance_topk(data,k):
data_tmp = data.copy()
train = data_tmp[data_tmp['SalePrice'].notnull()]
X=train.drop(['SalePrice'],axis=1)
Y=train.loc[:,'SalePrice'].values
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33, random_state=666)
model = xgb.XGBRegressor(
n_jobs=-1,
random_state=666
)
param_grid = {
'learning_rate': [0.1],
'n_estimators': [100]
}
gridsearch = GridSearchCV(
model,
param_grid=param_grid,
cv=5,
verbose=3,
n_jobs=-1
)
gridsearch.fit(X_train, y_train)
model = gridsearch.best_estimator_
'''
feature_importances_方法以特征馈送给算法的顺序返回相对重要性数字。因此,为了获得前20名的功能,你会想从最到最不重要的功能,例如像这样进行排序:
'''
importances = model.feature_importances_
indices = np.argsort(-importances)[:k]
columns = train.iloc[:,indices].columns.tolist()
res = []
for col in columns:
for feat, importance in zip(train.columns, model.feature_importances_):
if col == feat:
res.append((col,importance))
return res
len(data.columns)
295
# '''
# 以获得每个功能名称的重要性,只是通过列名迭代和feature_importances在一起(它们相互映射):
# '''
# for feat, importance in zip(train.columns, model.feature_importances_):
# print( 'feature: {f}, importance: {i}'.format(f=feat, i=importance) )
xgb_feature_importance_topk(data,295)
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 3.3s remaining: 5.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 5.0s finished
[('OverallQual', 0.4092757),
('GrLivArea', 0.042585827),
('BsmtQual_Ex', 0.03662632),
('RoofMatl_ClyTile', 0.035097126),
('GarageCars', 0.025553642),
('CentralAir', 0.022178357),
('KitchenQual_TA', 0.020976413),
('Exterior1st_Stucco', 0.02032021),
('TotalBsmtSF', 0.01820988),
('MSSubClass_60', 0.016657786),
('1stFlrSF', 0.015945055),
('KitchenAbvGr', 0.0152769955),
('BsmtFinSF1', 0.014164196),
('Exterior2nd_Stucco', 0.0121664),
('2ndFlrSF', 0.011771718),
('TotRmsAbvGrd', 0.00974979),
('KitchenQual_Ex', 0.0096869),
('BsmtFinType1_GLQ', 0.008715735),
('MSZoning_RM', 0.0077215075),
('BsmtQual_Gd', 0.0075940914),
('ExterQual_Ex', 0.007450104),
('GarageQual_TA', 0.0070142536),
('Exterior1st_AsbShng', 0.006995251),
('Exterior2nd_Brk Cmn', 0.0065990016),
('LotShape_Reg', 0.0064700344),
('Fireplaces', 0.0063608997),
('KitchenQual_Gd', 0.0062793503),
('MSSubClass_30', 0.0060889646),
('GarageArea', 0.005154548),
('YearRemodAdd', 0.005096418),
('SaleType_New', 0.0050593144),
('GarageType_Attchd', 0.0045187445),
('FireplaceQu_Fa', 0.004383841),
('ExterQual_Fa', 0.0037600468),
('BsmtFinType2_LwQ', 0.003100799),
('LotArea', 0.0029817864),
('Electrical_FuseF', 0.0029267112),
('BldgType_Duplex', 0.0029150224),
('Neighborhood_SWISU', 0.0027692046),
('RoofMatl_WdShngl', 0.0025965958),
('Heating_Grav', 0.0025687595),
('LandSlope_Gtl', 0.0025014824),
('LandSlope_Mod', 0.0024780459),
('FullBath', 0.0024371848),
('BldgType_1Fam', 0.0023931647),
('Foundation_Stone', 0.0022776753),
('MSSubClass_75', 0.0022572486),
('MasVnrArea', 0.0022140164),
('SaleCondition_Family', 0.002167297),
('YrSold_2008', 0.0021614458),
('GarageType_Detchd', 0.0021610886),
('LandContour_HLS', 0.002159908),
('SaleType_WD', 0.002155278),
('Neighborhood_Crawfor', 0.002129462),
('Neighborhood_Edwards', 0.0020690016),
('HalfBath', 0.0019449288),
('LandContour_Lvl', 0.0018609057),
('Functional_Mod', 0.0018403352),
('ExterCond_Fa', 0.001830228),
('MasVnrType_BrkFace', 0.001814074),
('LotFrontage', 0.0017950683),
('ExterQual_Gd', 0.0017648138),
('Condition1_Feedr', 0.0017304313),
('SaleCondition_Partial', 0.0017095393),
('Condition1_RRAe', 0.0017053399),
('MSSubClass_20', 0.0016536457),
('HeatingQC_Fa', 0.0016335138),
('ExterQual_TA', 0.0016250247),
('OverallCond', 0.0016106549),
('Neighborhood_NAmes', 0.0016082175),
('BsmtQual_Fa', 0.0015694612),
('Exterior1st_BrkFace', 0.0015455327),
('Neighborhood_StoneBr', 0.0015435361),
('BsmtFullBath', 0.0014969024),
('BsmtExposure_No', 0.0014912919),
('Functional_Maj2', 0.0014770095),
('Heating_OthW', 0.0014321045),
('Neighborhood_Veenker', 0.00142436),
('Functional_Typ', 0.0013758234),
('Neighborhood_OldTown', 0.001347905),
('PavedDrive_Y', 0.001347674),
('EnclosedPorch', 0.00134469),
('Neighborhood_Sawyer', 0.0012952455),
('GarageYrBlt', 0.0012869029),
('RoofMatl_CompShg', 0.0011996817),
('FireplaceQu_TA', 0.001159448),
('MSZoning_RL', 0.0011549005),
('PavedDrive_N', 0.001129404),
('RoofStyle_Shed', 0.0011210005),
('BsmtExposure_Gd', 0.0010897304),
('BedroomAbvGr', 0.0010711068),
('BsmtCond_Fa', 0.0010670887),
('Neighborhood_IDOTRR', 0.0010394471),
('MasVnrType_BrkCmn', 0.0010386847),
('KitchenQual_Fa', 0.0010143467),
('OpenPorchSF', 0.0010134919),
('SaleCondition_Abnorml', 0.000997769),
('WoodDeckSF', 0.0009796731),
('LandSlope_Sev', 0.00096981646),
('MSZoning_C (all)', 0.00096615497),
('HouseStyle_SLvl', 0.00096104806),
('LandContour_Low', 0.00095283776),
('Exterior1st_Wd Sdng', 0.00093908736),
('BsmtCond_Gd', 0.00092369085),
('BsmtUnfSF', 0.0009159962),
('BsmtFinSF2', 0.000914709),
('Exterior2nd_Plywood', 0.0009035254),
('Neighborhood_CollgCr', 0.0008786999),
('BsmtQual_TA', 0.00087709253),
('PavedDrive_P', 0.0008450014),
('BsmtFinType1_Rec', 0.00083076884),
('LotConfig_CulDSac', 0.00082237995),
('Condition1_Artery', 0.00079686724),
('Id', 0.0007814562),
('Neighborhood_Timber', 0.0007689896),
('MoSold', 0.0007688444),
('HeatingQC_TA', 0.0007654603),
('Exterior2nd_MetalSd', 0.0007561838),
('GarageFinish_Fin', 0.0007506564),
('FireplaceQu_Gd', 0.0007478747),
('Neighborhood_Somerst', 0.00073150184),
('ScreenPorch', 0.00071018364),
('Exterior2nd_AsbShng', 0.00069117936),
('BldgType_2fmCon', 0.00068497774),
('Exterior2nd_Wd Sdng', 0.0006789549),
('GarageQual_Fa', 0.0006756631),
('Exterior2nd_ImStucc', 0.0006481715),
('BsmtFinType1_ALQ', 0.0006466766),
('HouseStyle_2Story', 0.00063225214),
('Exterior1st_WdShing', 0.00061583845),
('Exterior2nd_HdBoard', 0.00061482575),
('Exterior1st_HdBoard', 0.0006114065),
('HeatingQC_Ex', 0.0006030498),
('Condition1_PosN', 0.0005937698),
('Exterior2nd_VinylSd', 0.00057505973),
('FireplaceQu_Ex', 0.0005464077),
('BsmtFinType2_Unf', 0.0005114567),
('LowQualFinSF', 0.0005015399),
('BsmtCond_TA', 0.0004988435),
('LotConfig_FR3', 0.00048852694),
('Condition1_Norm', 0.00047821572),
('MSSubClass_120', 0.00046705888),
('SaleType_ConLD', 0.00045726588),
('BsmtExposure_Mn', 0.0004539239),
('BsmtFinType1_LwQ', 0.00044687753),
('GarageFinish_Unf', 0.00043614442),
('BsmtFinType2_ALQ', 0.0004010186),
('FireplaceQu_Po', 0.00038877616),
('LotConfig_FR2', 0.0003666034),
('HeatingQC_Gd', 0.0003664517),
('YearBuilt', 0.00036322788),
('Exterior1st_VinylSd', 0.0003612518),
('BsmtExposure_Av', 0.00035657448),
('HouseStyle_1Story', 0.00035240524),
('Exterior2nd_CmentBd', 0.0003379002),
('Neighborhood_BrkSide', 0.00033439198),
('SaleCondition_Normal', 0.00033135305),
('PoolArea', 0.00032846778),
('LotShape_IR1', 0.00031836148),
('LotShape_IR2', 0.0003158487),
('BsmtFinType1_BLQ', 0.00029770628),
('HouseStyle_1.5Fin', 0.0002949982),
('ExterCond_Gd', 0.00029466968),
('3SsnPorch', 0.00028137083),
('BsmtFinType1_Unf', 0.00025151562),
('MasVnrType_None', 0.0002474294),
('BsmtHalfBath', 0.00024731937),
('SaleType_COD', 0.0002446929),
('Functional_Min1', 0.00023831776),
('BsmtFinType2_GLQ', 0.0002348463),
('YrSold_2010', 0.0002297446),
('GarageCond_Fa', 0.00022557852),
('RoofMatl_Tar&Grv', 0.00020732886),
('GarageQual_Gd', 0.00020601533),
('LandContour_Bnk', 0.00020408761),
('Foundation_PConc', 0.00020276985),
('Condition1_RRAn', 0.00020187993),
('BsmtFinType2_Rec', 0.00019516733),
('GarageType_BuiltIn', 0.00018795398),
('LotConfig_Corner', 0.00017717268),
('LotConfig_Inside', 0.00017693448),
('Neighborhood_ClearCr', 0.00014557355),
('YrSold_2007', 0.0001376982),
('BldgType_Twnhs', 0.00013715278),
('BsmtFinType2_BLQ', 0.00013597694),
('Electrical_SBrkr', 0.00013098777),
('GarageType_Basment', 0.00013070596),
('YrSold_2009', 0.00012943595),
('SaleType_ConLI', 0.00012930483),
('Foundation_Slab', 0.0001262763),
('Electrical_FuseA', 0.00012433954),
('Neighborhood_NridgHt', 0.00011382785),
('GarageQual_Ex', 0.00010800753),
('Foundation_CBlock', 0.00010498048),
('Foundation_BrkTil', 8.812534e-05),
('GarageFinish_RFn', 7.479326e-05),
('Functional_Min2', 7.3838106e-05),
('Functional_Maj1', 6.6195884e-05),
('MSSubClass_50', 5.7079727e-05),
('YrSold_2006', 5.544721e-05),
('Exterior2nd_BrkFace', 4.9046714e-05),
('GarageCond_Gd', 4.6189987e-05),
('GarageCond_TA', 3.649019e-05),
('ExterCond_TA', 3.0958196e-05),
('RoofStyle_Gable', 1.6125034e-05),
('SaleCondition_Alloca', 1.4236363e-05),
('SaleType_ConLw', 1.3619546e-05),
('RoofStyle_Hip', 9.960351e-06),
('MSZoning_RH', 0.0),
('GarageCond_Po', 0.0),
('GarageCond_Ex', 0.0),
('SaleCondition_AdjLand', 0.0),
('GarageQual_Po', 0.0),
('LotShape_IR3', 0.0),
('MSSubClass_190', 0.0),
('SaleType_CWD', 0.0),
('SaleType_Con', 0.0),
('MSSubClass_160', 0.0),
('MSSubClass_150', 0.0),
('MSSubClass_90', 0.0),
('MSSubClass_85', 0.0),
('MSSubClass_80', 0.0),
('Street_Grvl', 0.0),
('MSSubClass_70', 0.0),
('Street_Pave', 0.0),
('MSSubClass_45', 0.0),
('MSSubClass_40', 0.0),
('GarageType_2Types', 0.0),
('GarageType_CarPort', 0.0),
('SaleType_Oth', 0.0),
('MSZoning_FV', 0.0),
('MSSubClass_180', 0.0),
('Foundation_Wood', 0.0),
('Utilities_AllPub', 0.0),
('HouseStyle_2.5Fin', 0.0),
('HouseStyle_1.5Unf', 0.0),
('RoofStyle_Mansard', 0.0),
('RoofStyle_Gambrel', 0.0),
('RoofStyle_Flat', 0.0),
('RoofMatl_WdShake', 0.0),
('RoofMatl_Roll', 0.0),
('RoofMatl_Metal', 0.0),
('RoofMatl_Membran', 0.0),
('MiscVal', 0.0),
('Exterior1st_Stone', 0.0),
('Exterior1st_Plywood', 0.0),
('Exterior1st_MetalSd', 0.0),
('Exterior1st_ImStucc', 0.0),
('Exterior1st_CemntBd', 0.0),
('Exterior1st_CBlock', 0.0),
('Exterior1st_BrkComm', 0.0),
('Exterior1st_AsphShn', 0.0),
('Exterior2nd_Wd Shng', 0.0),
('Exterior2nd_Stone', 0.0),
('Exterior2nd_Other', 0.0),
('Exterior2nd_CBlock', 0.0),
('Exterior2nd_AsphShn', 0.0),
('MasVnrType_Stone', 0.0),
('BsmtCond_Po', 0.0),
('ExterCond_Po', 0.0),
('ExterCond_Ex', 0.0),
('HouseStyle_2.5Unf', 0.0),
('Utilities_NoSeWa', 0.0),
('HouseStyle_SFoyer', 0.0),
('Condition2_Artery', 0.0),
('Functional_Sev', 0.0),
('Electrical_FuseP', 0.0),
('Electrical_Mix', 0.0),
('Neighborhood_SawyerW', 0.0),
('Neighborhood_NoRidge', 0.0),
('Neighborhood_NWAmes', 0.0),
('Neighborhood_NPkVill', 0.0),
('HeatingQC_Po', 0.0),
('Neighborhood_Mitchel', 0.0),
('Neighborhood_MeadowV', 0.0),
('Neighborhood_Gilbert', 0.0),
('Heating_Floor', 0.0),
('Neighborhood_BrDale', 0.0),
('Neighborhood_Blueste', 0.0),
('Neighborhood_Blmngtn', 0.0),
('Condition1_RRNn', 0.0),
('Condition1_RRNe', 0.0),
('Heating_GasW', 0.0),
('Condition1_PosA', 0.0),
('Heating_Wall', 0.0),
('Condition2_RRNn', 0.0),
('Condition2_RRAn', 0.0),
('Condition2_RRAe', 0.0),
('Condition2_PosN', 0.0),
('Condition2_PosA', 0.0),
('Condition2_Norm', 0.0),
('Condition2_Feedr', 0.0),
('BldgType_TwnhsE', 0.0),
('Heating_GasA', 0.0)]
OverallQual
- OverallQual feature_importance最高
17.OverallQual:
Rates the overall material and finish of the house 总体质量:评估房屋的整体材料和装饰
10 Very Excellent
9 Excellent
8 Very Good
7 Good
6 Above Average
5 Average
4 Below Average
3 Fair
2 Poor
1 Very Poor
单变量:
如果某个特征与目标高度相关,那么可以根据具体的情况取这个特征的统计值作为新的特征。
# 计数特征
# 统计单个变量数值次数作为新的特征
data_tmp = data.copy()
new_data = data_tmp.groupby(['OverallQual'])['OverallQual'].count().to_frame().rename(columns={'OverallQual':'OverallQual_count'}).reset_index()
data_tmp = pd.merge(data_tmp, new_data, on=['OverallQual'], how='inner')
print(f"OverallQual_count的唯一数据: {data_tmp['OverallQual_count'].unique()}")
train = data_tmp[data_tmp['SalePrice'].notnull()]
test = data_tmp[data_tmp['SalePrice'].isnull()].drop(['SalePrice'],axis=1)
score,_ = xgb_eval(train)
if score < last_score:
print('score:',score)
last_score = score
data = data_tmp
print('********')
print(last_score)
OverallQual_count的唯一数据: [600 731 342 825 107 226 31 40 4 13]
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.8s remaining: 7.2s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.3s finished
参数的最佳取值:{'learning_rate': 0.1, 'n_estimators': 100}
最佳模型得分:0.15916946763469966
run time is: 9 秒
********
0.1309254483960009
特征选择
xgboost特征重要性
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_log_error
from sklearn.feature_selection import SelectFromModel
def xgb_select_features(data):
start = datetime.datetime.now()
train_df = copy.deepcopy(data)
X=train_df.drop(['SalePrice'],axis=1)
Y=train_df.loc[:,'SalePrice'].values
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33, random_state=666)
model = xgb.XGBRegressor(
n_jobs=-1,
random_state=666
)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
score = np.sqrt(mean_squared_log_error(y_test, y_pred))
print('score is: ',score)
thresholds = np.sort(model.feature_importances_)
print(thresholds)
best_score = last_score
best_thresh = 0
best_n = 0
best_selection = 0
for thresh in thresholds:
# select features using threshold
selection = SelectFromModel(model, threshold=thresh, prefit=True)
select_X_train = selection.transform(X_train)
selection_model = XGBRegressor()
selection_model.fit(select_X_train, y_train)
select_X_test = selection.transform(X_test)
y_pred = selection_model.predict(select_X_test)
score = np.sqrt(mean_squared_log_error(y_test, y_pred))
print(f'thresh = {thresh}, n = {select_X_train.shape[1]}, score = {score}')
if score < best_score:
best_score = score
best_thresh = thresh
best_n = select_X_train.shape[1]
best_selection = selection
print('**********')
print('best_score',best_score)
print('best_thresh',best_thresh)
print('best_n',best_n)
print('best_selection',best_selection)
end = datetime.datetime.now()
print('run time is:',(end-start).seconds,'秒')
return best_score, best_selection
data_tmp = data.copy()
train = data_tmp[data_tmp['SalePrice'].notnull()]
test = data_tmp[data_tmp['SalePrice'].isnull()].drop(['SalePrice'],axis=1)
score, selection = xgb_select_features(train)
score is: 0.1634269756512917
[0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 2.21226992e-06 2.89932450e-06 4.01458919e-06
4.20125571e-06 4.55453346e-06 9.04142507e-06 9.96066228e-06
1.00254647e-05 1.19890656e-05 1.24969820e-05 1.33192452e-05
1.50671058e-05 1.57984032e-05 1.64488301e-05 1.65456859e-05
1.66506961e-05 1.67086491e-05 1.68370625e-05 1.68880233e-05
2.24718078e-05 2.45410683e-05 2.50417343e-05 2.65526050e-05
2.66677962e-05 2.81739158e-05 3.15517100e-05 3.20705658e-05
3.27122434e-05 3.29928953e-05 3.29968752e-05 3.39528415e-05
3.73345647e-05 4.07940606e-05 4.34545436e-05 4.40089098e-05
4.59742078e-05 5.13398518e-05 5.29541248e-05 5.51951016e-05
5.89233132e-05 5.93937548e-05 6.13122611e-05 6.18340200e-05
6.73107279e-05 6.82628670e-05 6.88516811e-05 7.01651952e-05
7.58385213e-05 7.66053636e-05 7.91334314e-05 8.01987990e-05
8.45546747e-05 8.55193794e-05 8.72424353e-05 9.19281811e-05
9.60359830e-05 1.03984610e-04 1.07962944e-04 1.08941800e-04
1.13577124e-04 1.22257770e-04 1.34212489e-04 1.49489177e-04
1.49857922e-04 1.51855944e-04 1.52076711e-04 1.53393194e-04
1.59260744e-04 1.67861217e-04 1.76806599e-04 1.80713236e-04
1.82674266e-04 1.85362616e-04 1.86537334e-04 1.89300932e-04
1.93503409e-04 2.06448414e-04 2.08053942e-04 2.08536789e-04
2.09548874e-04 2.10550992e-04 2.15660853e-04 2.23734547e-04
2.26629345e-04 2.27951896e-04 2.30085352e-04 2.36041815e-04
2.38105218e-04 2.43984279e-04 2.44529539e-04 2.45253090e-04
2.50702025e-04 2.56657251e-04 2.64317176e-04 2.67900672e-04
2.71077937e-04 2.87270523e-04 2.95323494e-04 2.96458253e-04
3.04090703e-04 3.09348950e-04 3.11128359e-04 3.11715499e-04
3.27558402e-04 3.29893635e-04 3.30773328e-04 3.39805527e-04
3.46207613e-04 3.50556540e-04 3.55951575e-04 3.66762193e-04
3.68454348e-04 3.69566202e-04 3.73052113e-04 4.08736407e-04
4.34989692e-04 4.35515103e-04 4.49951098e-04 4.62014752e-04
4.70861385e-04 4.75046923e-04 4.77300986e-04 4.92862833e-04
4.94032749e-04 5.19220601e-04 5.50425262e-04 5.56649757e-04
5.63154230e-04 5.79251908e-04 5.94773970e-04 6.21029816e-04
6.29337505e-04 6.31644973e-04 6.36423938e-04 6.56360935e-04
6.75858755e-04 6.79097837e-04 7.00469827e-04 7.05525803e-04
7.35779875e-04 7.47704995e-04 7.81524810e-04 7.98267254e-04
8.21693102e-04 9.33263858e-04 9.45827691e-04 9.72469221e-04
9.89662833e-04 9.92853427e-04 1.01081666e-03 1.04294647e-03
1.04454008e-03 1.04972383e-03 1.05285691e-03 1.05973973e-03
1.07484148e-03 1.08457590e-03 1.10268581e-03 1.12434081e-03
1.14494795e-03 1.15139328e-03 1.17061171e-03 1.18661940e-03
1.24276371e-03 1.29680778e-03 1.31867221e-03 1.44795922e-03
1.56493811e-03 1.61495444e-03 1.65214005e-03 1.69188995e-03
1.80765823e-03 1.86003116e-03 1.90357771e-03 1.94085762e-03
1.96319749e-03 2.10982189e-03 2.21060892e-03 2.53220042e-03
2.60955282e-03 2.63344147e-03 2.63724546e-03 2.65168841e-03
2.68658251e-03 2.76857032e-03 2.82136258e-03 2.87449546e-03
2.99509475e-03 3.02256388e-03 3.12633999e-03 3.13722529e-03
3.41721484e-03 3.44115961e-03 3.52469017e-03 3.73639143e-03
3.83520126e-03 3.90244904e-03 4.27996507e-03 4.38119797e-03
4.49576136e-03 4.64812294e-03 5.13938162e-03 5.47109591e-03
5.66002959e-03 6.16141642e-03 6.84734341e-03 9.20873415e-03
1.04834912e-02 1.18552931e-02 1.25425709e-02 1.31127713e-02
1.38130784e-02 2.41148882e-02 2.83758435e-02 4.22262028e-02
4.42106090e-02 5.10912351e-02 5.58977053e-02 5.80743290e-02
6.67559952e-02 3.73314440e-01]
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 0.0, n = 294, score = 0.1634269756512917
thresh = 2.21226991925505e-06, n = 221, score = 0.1634269756512917
thresh = 2.8993244995945133e-06, n = 220, score = 0.1634269756512917
thresh = 4.01458919441211e-06, n = 219, score = 0.1635835783361062
thresh = 4.20125570599339e-06, n = 218, score = 0.1635789199023971
thresh = 4.5545334614871535e-06, n = 217, score = 0.1635813744412464
thresh = 9.041425073519349e-06, n = 216, score = 0.16361114562337073
thresh = 9.960662282537669e-06, n = 215, score = 0.16359462119264123
thresh = 1.0025464689533692e-05, n = 214, score = 0.1635937181987366
thresh = 1.1989065569650847e-05, n = 213, score = 0.16353696589123407
thresh = 1.2496981980802957e-05, n = 212, score = 0.16375139615480133
thresh = 1.331924522673944e-05, n = 211, score = 0.1637449208155299
thresh = 1.5067105778143741e-05, n = 210, score = 0.1637449208155299
thresh = 1.57984031829983e-05, n = 209, score = 0.1642321668826825
thresh = 1.6448830137960613e-05, n = 208, score = 0.16405079653996815
thresh = 1.654568586673122e-05, n = 207, score = 0.1632326678272285
thresh = 1.665069612499792e-05, n = 206, score = 0.16252760604953023
thresh = 1.6708649127394892e-05, n = 205, score = 0.16240455414022192
thresh = 1.6837062503327616e-05, n = 204, score = 0.1633090720960328
thresh = 1.6888023310457356e-05, n = 203, score = 0.16214144060679214
thresh = 2.2471807824331336e-05, n = 202, score = 0.16326741926876243
thresh = 2.454106834193226e-05, n = 201, score = 0.16326741926876243
thresh = 2.5041734261321835e-05, n = 200, score = 0.16309584143571731
thresh = 2.655260504980106e-05, n = 199, score = 0.16310408282125827
thresh = 2.6667796191759408e-05, n = 198, score = 0.16310408282125827
thresh = 2.817391577991657e-05, n = 197, score = 0.16310408282125827
thresh = 3.155170998070389e-05, n = 196, score = 0.16339547132461527
thresh = 3.2070565794128925e-05, n = 195, score = 0.16339956137815564
thresh = 3.271224341006018e-05, n = 194, score = 0.16339956137815564
thresh = 3.299289528513327e-05, n = 193, score = 0.16352218599509166
thresh = 3.299687523394823e-05, n = 192, score = 0.16353020221215883
thresh = 3.395284147700295e-05, n = 191, score = 0.16401581124232292
thresh = 3.733456469490193e-05, n = 190, score = 0.16401581124232292
thresh = 4.0794060623738915e-05, n = 189, score = 0.1633548655544686
thresh = 4.345454362919554e-05, n = 188, score = 0.1625988150476915
thresh = 4.40089097537566e-05, n = 187, score = 0.16251178915083403
thresh = 4.597420775098726e-05, n = 186, score = 0.16273098610625014
thresh = 5.133985177963041e-05, n = 185, score = 0.1623748258496059
thresh = 5.29541248397436e-05, n = 184, score = 0.16306568065478022
thresh = 5.519510159501806e-05, n = 183, score = 0.16306568065478022
thresh = 5.892331319046207e-05, n = 182, score = 0.16306545807539408
thresh = 5.9393754781922325e-05, n = 181, score = 0.1627641191475676
thresh = 6.131226109573618e-05, n = 180, score = 0.16232541907143588
thresh = 6.183402001624927e-05, n = 179, score = 0.16165375976178925
thresh = 6.731072789989412e-05, n = 178, score = 0.1616147201897199
thresh = 6.826286698924378e-05, n = 177, score = 0.16225508227129698
thresh = 6.885168113512918e-05, n = 176, score = 0.16186094233915702
thresh = 7.016519521130249e-05, n = 175, score = 0.16255273603058806
thresh = 7.583852129755542e-05, n = 174, score = 0.16255273603058806
thresh = 7.660536357434466e-05, n = 173, score = 0.16240791832631746
thresh = 7.913343142718077e-05, n = 172, score = 0.16408492230212596
thresh = 8.019879896892235e-05, n = 171, score = 0.16554639191158738
thresh = 8.45546746859327e-05, n = 170, score = 0.16412678725386642
thresh = 8.551937935408205e-05, n = 169, score = 0.16392377325903976
thresh = 8.72424352564849e-05, n = 168, score = 0.16501163023644486
thresh = 9.192818106384948e-05, n = 167, score = 0.1612071772276545
thresh = 9.603598300600424e-05, n = 166, score = 0.16168144378177213
thresh = 0.00010398461017757654, n = 165, score = 0.1608330671877611
thresh = 0.00010796294372994453, n = 164, score = 0.160868133700044
thresh = 0.00010894180013565347, n = 163, score = 0.16192426505369828
thresh = 0.00011357712355675176, n = 162, score = 0.16155774994131725
thresh = 0.00012225777027197182, n = 161, score = 0.162509940499741
thresh = 0.00013421248877421021, n = 160, score = 0.16242071573314554
thresh = 0.0001494891766924411, n = 159, score = 0.1643892302937634
thresh = 0.00014985792222432792, n = 158, score = 0.1643892302937634
thresh = 0.00015185594384092838, n = 157, score = 0.1638682109182789
thresh = 0.00015207671094685793, n = 156, score = 0.16469652920000286
thresh = 0.00015339319361373782, n = 155, score = 0.16305642519498012
thresh = 0.00015926074411254376, n = 154, score = 0.16283903028472813
thresh = 0.00016786121705081314, n = 153, score = 0.16314592703845424
thresh = 0.00017680659948382527, n = 152, score = 0.1643053028990939
thresh = 0.0001807132357498631, n = 151, score = 0.16356331612515632
thresh = 0.00018267426639795303, n = 150, score = 0.16434283180710632
thresh = 0.00018536261632107198, n = 149, score = 0.16434283180710632
thresh = 0.00018653733422979712, n = 148, score = 0.1653295125571202
thresh = 0.00018930093210656196, n = 147, score = 0.1625449962063003
thresh = 0.00019350340880919248, n = 146, score = 0.16299693193547804
thresh = 0.0002064484142465517, n = 145, score = 0.16361440676890054
thresh = 0.00020805394160561264, n = 144, score = 0.16368977688955758
thresh = 0.00020853678870480508, n = 143, score = 0.16255519616685787
thresh = 0.000209548874408938, n = 142, score = 0.1641198542763368
thresh = 0.00021055099205113947, n = 141, score = 0.16325382141903572
thresh = 0.0002156608534278348, n = 140, score = 0.16325382141903572
thresh = 0.0002237345470348373, n = 139, score = 0.16173847547448797
thresh = 0.0002266293449793011, n = 138, score = 0.1623182921433018
thresh = 0.00022795189579483122, n = 137, score = 0.16233337429418535
thresh = 0.00023008535208646208, n = 136, score = 0.16159519657109647
thresh = 0.00023604181478731334, n = 135, score = 0.163579003818478
thresh = 0.00023810521815903485, n = 134, score = 0.16061957410222374
thresh = 0.00024398427922278643, n = 133, score = 0.16596828868389774
thresh = 0.00024452953948639333, n = 132, score = 0.1657755657445966
thresh = 0.0002452530898153782, n = 131, score = 0.16395945945497026
thresh = 0.0002507020253688097, n = 130, score = 0.16450701322995906
thresh = 0.00025665725115686655, n = 129, score = 0.16367981639020554
thresh = 0.00026431717560626566, n = 128, score = 0.16473021810564228
thresh = 0.00026790067204274237, n = 127, score = 0.16396632144606818
thresh = 0.000271077937213704, n = 126, score = 0.16396632144606818
thresh = 0.0002872705226764083, n = 125, score = 0.16354736923901833
thresh = 0.0002953234943561256, n = 124, score = 0.1652204162014962
thresh = 0.00029645825270563364, n = 123, score = 0.16373418031526485
thresh = 0.0003040907031390816, n = 122, score = 0.16374166529757242
thresh = 0.0003093489503953606, n = 121, score = 0.16578630155126245
thresh = 0.0003111283585894853, n = 120, score = 0.16599253432773428
thresh = 0.00031171549926511943, n = 119, score = 0.1652697314394485
thresh = 0.0003275584022048861, n = 118, score = 0.1649785715129932
thresh = 0.0003298936353530735, n = 117, score = 0.16558838159126518
thresh = 0.0003307733277324587, n = 116, score = 0.16380566984753903
thresh = 0.0003398055268917233, n = 115, score = 0.16559102459099373
thresh = 0.0003462076128926128, n = 114, score = 0.16293950390118359
thresh = 0.0003505565400701016, n = 113, score = 0.16566089599854528
thresh = 0.00035595157532952726, n = 112, score = 0.1632106660019096
thresh = 0.000366762193152681, n = 111, score = 0.16305305978066092
thresh = 0.0003684543480630964, n = 110, score = 0.16447802792574603
thresh = 0.000369566201698035, n = 109, score = 0.16509355390365787
thresh = 0.0003730521129909903, n = 108, score = 0.16411459468982148
thresh = 0.00040873640682548285, n = 107, score = 0.16374926836482503
thresh = 0.0004349896917119622, n = 106, score = 0.16367785169581325
thresh = 0.00043551510316319764, n = 105, score = 0.1636866731052735
thresh = 0.0004499510978348553, n = 104, score = 0.1616459879131297
thresh = 0.00046201475197449327, n = 103, score = 0.16396574130177524
thresh = 0.0004708613851107657, n = 102, score = 0.1635921707647243
thresh = 0.0004750469233840704, n = 101, score = 0.1635921707647243
thresh = 0.00047730098594911397, n = 100, score = 0.16399328347802072
thresh = 0.0004928628331981599, n = 99, score = 0.16564605568682592
thresh = 0.0004940327489748597, n = 98, score = 0.16325210406804963
thresh = 0.000519220600835979, n = 97, score = 0.16261771100132597
thresh = 0.0005504252621904016, n = 96, score = 0.1634582229497553
thresh = 0.0005566497566178441, n = 95, score = 0.16179959210227152
thresh = 0.0005631542298942804, n = 94, score = 0.16451741441940848
thresh = 0.0005792519077658653, n = 93, score = 0.16380661635734156
thresh = 0.0005947739700786769, n = 92, score = 0.16174063443377779
thresh = 0.0006210298161022365, n = 91, score = 0.16405510686507763
thresh = 0.0006293375045061111, n = 90, score = 0.16201802505482488
thresh = 0.0006316449726000428, n = 89, score = 0.16227768314190222
thresh = 0.0006364239379763603, n = 88, score = 0.1613357753838816
thresh = 0.0006563609349541366, n = 87, score = 0.15489379676447132
thresh = 0.0006758587551303208, n = 86, score = 0.15996636687018856
thresh = 0.0006790978368371725, n = 85, score = 0.16409704605134612
thresh = 0.0007004698272794485, n = 84, score = 0.1611161735435722
thresh = 0.0007055258029140532, n = 83, score = 0.15680771378798877
thresh = 0.0007357798749580979, n = 82, score = 0.15924861727829498
thresh = 0.0007477049948647618, n = 81, score = 0.15728230052812234
thresh = 0.0007815248100087047, n = 80, score = 0.15719261836399
thresh = 0.000798267254140228, n = 79, score = 0.16172815389738718
thresh = 0.0008216931018978357, n = 78, score = 0.15949682699106826
thresh = 0.0009332638583146036, n = 77, score = 0.16160607772821436
thresh = 0.0009458276908844709, n = 76, score = 0.16001279746476574
thresh = 0.0009724692208692431, n = 75, score = 0.16216081247759648
thresh = 0.0009896628325805068, n = 74, score = 0.16189270681592866
thresh = 0.0009928534273058176, n = 73, score = 0.16246173173728296
thresh = 0.0010108166607096791, n = 72, score = 0.1592728379608147
thresh = 0.0010429464746266603, n = 71, score = 0.15917095371048154
thresh = 0.0010445400839671493, n = 70, score = 0.15930647656578448
thresh = 0.001049723825417459, n = 69, score = 0.15966212326617918
thresh = 0.0010528569109737873, n = 68, score = 0.16189855076065846
thresh = 0.0010597397340461612, n = 67, score = 0.16183779727966732
thresh = 0.0010748414788395166, n = 66, score = 0.16183779727966732
thresh = 0.0010845758952200413, n = 65, score = 0.16008020907150441
thresh = 0.0011026858119294047, n = 64, score = 0.16718195491600213
thresh = 0.001124340808019042, n = 63, score = 0.16039879801638024
thresh = 0.001144947949796915, n = 62, score = 0.1611256611827503
thresh = 0.0011513932840898633, n = 61, score = 0.1611256611827503
thresh = 0.0011706117074936628, n = 60, score = 0.16379329884078375
thresh = 0.0011866193963214755, n = 59, score = 0.16408328930169308
thresh = 0.0012427637120708823, n = 58, score = 0.16537973221694566
thresh = 0.0012968077789992094, n = 57, score = 0.16442913579291546
thresh = 0.0013186722062528133, n = 56, score = 0.16507222814151795
thresh = 0.0014479592209681869, n = 55, score = 0.16703247619403705
thresh = 0.0015649381093680859, n = 54, score = 0.16808817122288616
thresh = 0.0016149544389918447, n = 53, score = 0.16466868303634474
thresh = 0.0016521400539204478, n = 52, score = 0.16458397126306654
thresh = 0.001691889949142933, n = 51, score = 0.1666959750184447
thresh = 0.0018076582346111536, n = 50, score = 0.16842319933852176
thresh = 0.001860031159594655, n = 49, score = 0.1694556190927747
thresh = 0.001903577707707882, n = 48, score = 0.16781088785246723
thresh = 0.001940857619047165, n = 47, score = 0.17114455865700695
thresh = 0.00196319748647511, n = 46, score = 0.16559975270137217
thresh = 0.0021098218858242035, n = 45, score = 0.16636048839832915
thresh = 0.0022106089163571596, n = 44, score = 0.16228946266195293
thresh = 0.0025322004221379757, n = 43, score = 0.16706401646520208
thresh = 0.002609552815556526, n = 42, score = 0.16519764397156145
thresh = 0.002633441472426057, n = 41, score = 0.16357918623510773
thresh = 0.002637245459482074, n = 40, score = 0.16505931778783206
thresh = 0.0026516884099692106, n = 39, score = 0.16802911621134767
thresh = 0.0026865825057029724, n = 38, score = 0.16695704165547676
thresh = 0.002768570324406028, n = 37, score = 0.1676591145615499
thresh = 0.00282136257737875, n = 36, score = 0.16975460824029226
thresh = 0.0028744954615831375, n = 35, score = 0.16330457796774298
thresh = 0.0029950947500765324, n = 34, score = 0.17694671133982054
thresh = 0.0030225638765841722, n = 33, score = 0.17648102502023405
thresh = 0.0031263399869203568, n = 32, score = 0.18014432945641684
thresh = 0.0031372252851724625, n = 31, score = 0.17931339793706613
thresh = 0.0034172148443758488, n = 30, score = 0.17955741533055633
thresh = 0.0034411596134305, n = 29, score = 0.17655572295901728
thresh = 0.003524690167978406, n = 28, score = 0.17824446649492398
thresh = 0.003736391430720687, n = 27, score = 0.17675593365353548
thresh = 0.0038352012634277344, n = 26, score = 0.17828571170297894
thresh = 0.0039024490397423506, n = 25, score = 0.1710506549793859
thresh = 0.004279965069144964, n = 24, score = 0.1747581895578411
thresh = 0.0043811979703605175, n = 23, score = 0.17549665567733588
thresh = 0.0044957613572478294, n = 22, score = 0.17530776404500276
thresh = 0.004648122936487198, n = 21, score = 0.17766274634743445
thresh = 0.0051393816247582436, n = 20, score = 0.17815268262237638
thresh = 0.005471095908433199, n = 19, score = 0.17804171380249728
thresh = 0.005660029593855143, n = 18, score = 0.17956257680284474
thresh = 0.006161416415125132, n = 17, score = 0.18127994476276035
thresh = 0.006847343407571316, n = 16, score = 0.18390724420240004
thresh = 0.009208734147250652, n = 15, score = 0.18680562071335757
thresh = 0.010483491234481335, n = 14, score = 0.18684297119149312
thresh = 0.011855293065309525, n = 13, score = 0.18591620833137226
thresh = 0.012542570941150188, n = 12, score = 0.18655437837361516
thresh = 0.013112771324813366, n = 11, score = 0.187595422816317
thresh = 0.0138130784034729, n = 10, score = 0.18590297656149943
thresh = 0.024114888161420822, n = 9, score = 0.20589942195000205
thresh = 0.028375843539834023, n = 8, score = 0.202828827635469
thresh = 0.04222620278596878, n = 7, score = 0.21003656282444372
thresh = 0.044210609048604965, n = 6, score = 0.2185621315162222
thresh = 0.051091235131025314, n = 5, score = 0.2191857107199869
thresh = 0.055897705256938934, n = 4, score = 0.21340924911246614
thresh = 0.05807432904839516, n = 3, score = 0.213585922331518
thresh = 0.0667559951543808, n = 2, score = 0.22994426940573962
thresh = 0.37331444025039673, n = 1, score = 0.23095892562782216
**********
best_score 0.1309254483960009
best_thresh 0
best_n 0
best_selection 0
run time is: 262 秒
计算特征与目标的相关系数以及P值
# 相关系数——特征与目标变量
from sklearn.feature_selection import SelectKBest
from scipy.stats import pearsonr
def xgb_eval2(X, Y):
start = datetime.datetime.now()
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33, random_state=666)
model = xgb.XGBRegressor(
n_jobs=-1,
random_state=666
)
param_grid = {
'learning_rate': [0.1],
'n_estimators': [100]
}
gridsearch = GridSearchCV(
model,
param_grid=param_grid,
cv=5,
verbose=3,
n_jobs=-1
)
gridsearch.fit(X_train, y_train)
print('best param is: ', gridsearch.best_params_)
model = gridsearch.best_estimator_
y_pred = model.predict(X_test)
score = np.sqrt(mean_squared_log_error(y_test, y_pred))
print('score is: ',score)
end = datetime.datetime.now()
print('run time is:',(end-start).seconds,'秒')
return score, model
data_tmp = data.copy()
train = data_tmp[data_tmp['SalePrice'].notnull()]
X=train.drop(['SalePrice'],axis=1)
Y=train.loc[:,'SalePrice'].values
fun = lambda X, Y: tuple(map(tuple, np.array(list(map(lambda x: pearsonr(x, Y), X.T))).T))
for ki in range(1, len(train.columns.tolist())):
sb = SelectKBest(fun, k=ki)
x_fit = sb.fit(X, Y)
x_sb = x_fit.transform(X)
X_newcolumnsname = train.iloc[:, x_fit.get_support(indices=True)].columns.tolist()
# print('>>>检验统计值(相关系数):\n', sb.scores_)
# print('\n>>>P值:\n', sb.pvalues_)
score,_ = xgb_eval2(x_sb, Y)
if score < last_score:
print('score: ',score)
print('X_newcolumnsname: ',X_newcolumnsname)
print(f'特征个数{len(X_newcolumnsname)}')
last_score = score
X_newcolumnsname.append('SalePrice')
data = data_tmp[X_newcolumnsname]
print('********')
print(last_score)
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 0.0s remaining: 0.0s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 0.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
best param is: {'learning_rate': 0.1, 'n_estimators': 100}
score is: 0.23095657084432536
run time is: 0 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 0.0s remaining: 0.1s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 0.1s finished
best param is: {'learning_rate': 0.1, 'n_estimators': 100}
score is: 0.22204959452117293
run time is: 0 秒
Fitting 5 folds for each of 1 candidates, totalling 5 fits
… …
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 3.4s remaining: 5.2s
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 5.3s finished
best param is: {'learning_rate': 0.1, 'n_estimators': 100}
score is: 0.1509751277940482
run time is: 6 秒
********
0.1309254483960009
len(data.columns)
295
train = data[data['SalePrice'].notnull()]
test = data[data['SalePrice'].isnull()]
# data.to_csv('data__after_feature_engineering.csv',index=False)
# train.to_csv('train_after_feature_engineering.csv',index=False)
# test.to_csv('test_after_feature_engineering.csv',index=False)
模型调优
确定一个固定的学习率(学习率越小,时间成本越高):
learning_rate的取值范围一般在[0.01,0.3]之间
如果时间充裕,可以把learning_rate设置的更小;
如果时间紧张,可以把learning_rate设置的更大;
1)调试n_estimators;
2)调试min_child_weight以及max_depth;
3)调试gamma;
4)调试subsample、colsample_bytree;
5)调试正则化参数:reg_alpha、reg_lambda;
# 自定义scoring
# https://scikit-learn.org/stable/modules/model_evaluation.html#scoring-parameter
from sklearn.metrics import make_scorer
def xgb_eval(data, cv_params, other_params):
def my_error_func(y_ture, y_pred):
error = np.sqrt(mean_squared_log_error(y_ture, y_pred))
return error
my_score = make_scorer(my_error_func, greater_is_better=False)
start = datetime.datetime.now()
train_df = copy.deepcopy(data)
X_train=train_df.drop(['SalePrice'],axis=1)
y_train=train_df.loc[:,'SalePrice'].values
# X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33, random_state=666)
xgb_reg = xgb.XGBRegressor(**other_params)
param_grid = {
'learning_rate': [0.1],
'n_estimators': [100]
}
gridsearch = GridSearchCV(
estimator=xgb_reg,
param_grid=cv_params,
scoring=my_score,
cv=5,
verbose=3,
n_jobs=-1
)
gridsearch.fit(X_train, y_train)
print('参数的最佳取值:{0}'.format(gridsearch.best_params_))
print('最佳模型得分:{0}'.format(-gridsearch.best_score_))
end = datetime.datetime.now()
print('run time is:',(end-start).seconds,'秒')
return -gridsearch.best_score_, gridsearch.cv_results_
调n_estimators
## 粗调
#粗调
cv_params = {
'n_estimators': np.arange(10, 1500, 50),
}
other_params = {
'learning_rate': 0.1,
'max_depth': 5,
'min_child_weight': 1,
'subsample': 0.8,
'colsample_bytree': 0.8,
'gamma': 0,
'reg_alpha': 0,
'reg_lambda': 1,
'n_jobs':-1,
'random_state':666
}
data_tmp = data.copy()
train = data_tmp[data_tmp['SalePrice'].notnull()]
score, cv_results = xgb_eval(train, cv_params, other_params)
plt.plot(np.arange(10, 1500, 50), cv_results['mean_test_score'])
plt.show()
Fitting 5 folds for each of 30 candidates, totalling 150 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 24 tasks | elapsed: 26.8s
[Parallel(n_jobs=-1)]: Done 120 tasks | elapsed: 10.4min
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed: 16.2min finished
参数的最佳取值:{'n_estimators': 460}
最佳模型得分:0.1288088360602727
run time is: 977 秒
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-NUHSubOP-1615804774093)(output_273_3.png)]
# 参数的最佳取值:{'n_estimators': 460}
# 最佳模型得分:0.1288088360602727
# 细调
cv_params = {
'n_estimators': np.arange(460-50, 460+50,1),
}
other_params = {
'learning_rate': 0.1,
'max_depth': 5,
'min_child_weight': 1,
'subsample': 0.8,
'colsample_bytree': 0.8,
'gamma': 0,
'reg_alpha': 0,
'reg_lambda': 1,
'n_jobs':-1,
'random_state':666
}
data_tmp = data.copy()
train = data_tmp[data_tmp['SalePrice'].notnull()]
score, cv_results = xgb_eval(train, cv_params, other_params)
plt.plot(np.arange(460-50, 460+50,1), cv_results['mean_test_score'])
plt.show()
Fitting 5 folds for each of 100 candidates, totalling 500 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 24 tasks | elapsed: 1.6min
[Parallel(n_jobs=-1)]: Done 120 tasks | elapsed: 7.3min
[Parallel(n_jobs=-1)]: Done 280 tasks | elapsed: 16.9min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed: 31.1min finished
参数的最佳取值:{'n_estimators': 452}
最佳模型得分:0.12878427962473019
run time is: 1870 秒
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-IBbJsmb6-1615804774096)(output_275_3.png)]
调max_depth, min_child_weight
cv_params = {
'max_depth':range(1,10,1),
'min_child_weight':range(1,10,1)
}
other_params = {
'learning_rate': 0.1,
'n_estimators':452,
'max_depth': 5,
'min_child_weight': 1,
'subsample': 0.8,
'colsample_bytree': 0.8,
'gamma': 0,
'reg_alpha': 0,
'reg_lambda': 1,
'n_jobs':-1,
'random_state':666
}
train = data[data['SalePrice'].notnull()]
score, cv_results = xgb_eval(train, cv_params, other_params)
Fitting 5 folds for each of 81 candidates, totalling 405 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 24 tasks | elapsed: 22.9s
[Parallel(n_jobs=-1)]: Done 120 tasks | elapsed: 3.1min
[Parallel(n_jobs=-1)]: Done 280 tasks | elapsed: 12.6min
[Parallel(n_jobs=-1)]: Done 405 out of 405 | elapsed: 24.3min finished
参数的最佳取值:{'max_depth': 4, 'min_child_weight': 4}
最佳模型得分:0.12662196605686601
run time is: 1460 秒
调gamma
cv_params = {
'gamma':[i/10.0 for i in range(0,5)]
}
other_params = {
'learning_rate': 0.1,
'n_estimators':452,
'max_depth': 4,
'min_child_weight': 4,
'subsample': 0.8,
'colsample_bytree': 0.8,
'gamma': 0,
'reg_alpha': 0,
'reg_lambda': 1,
'n_jobs':-1,
'random_state':666
}
train = data[data['SalePrice'].notnull()]
score, cv_results = xgb_eval(train, cv_params, other_params)
Fitting 5 folds for each of 5 candidates, totalling 25 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 25 out of 25 | elapsed: 1.4min finished
参数的最佳取值:{'gamma': 0.0}
最佳模型得分:0.12662196605686601
run time is: 89 秒
调优subsample 和 colsample_bytree
cv_params = {
'subsample':[i/10.0 for i in range(4,10)],
'colsample_bytree':[i/10.0 for i in range(4,10)]
}
other_params = {
'learning_rate': 0.1,
'n_estimators':452,
'max_depth': 4,
'min_child_weight': 4,
'subsample': 0.8,
'colsample_bytree': 0.8,
'gamma': 0,
'reg_alpha': 0,
'reg_lambda': 1,
'n_jobs':-1,
'random_state':666
}
train = data[data['SalePrice'].notnull()]
score, cv_results = xgb_eval(train, cv_params, other_params)
Fitting 5 folds for each of 36 candidates, totalling 180 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 24 tasks | elapsed: 38.8s
[Parallel(n_jobs=-1)]: Done 120 tasks | elapsed: 4.3min
[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed: 7.3min finished
参数的最佳取值:{'colsample_bytree': 0.6, 'subsample': 0.9}
最佳模型得分:0.1248652563014746
run time is: 439 秒
调优正则化参数
cv_params = {
'reg_alpha': [0.05, 0.1, 1, 2, 3, 10, 50, 100],
'reg_lambda': [0.05, 0.1, 1, 2, 3]
}
other_params = {
'learning_rate': 0.1,
'n_estimators':452,
'max_depth': 4,
'min_child_weight': 4,
'subsample': 0.9,
'colsample_bytree': 0.6,
'gamma': 0,
'reg_alpha': 0,
'reg_lambda': 1,
'n_jobs':-1,
'random_state':666
}
train = data[data['SalePrice'].notnull()]
score, cv_results = xgb_eval(train, cv_params, other_params)
Fitting 5 folds for each of 40 candidates, totalling 200 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 24 tasks | elapsed: 52.2s
[Parallel(n_jobs=-1)]: Done 120 tasks | elapsed: 4.7min
[Parallel(n_jobs=-1)]: Done 200 out of 200 | elapsed: 7.8min finished
参数的最佳取值:{'reg_alpha': 1, 'reg_lambda': 1}
最佳模型得分:0.1246989760441339
run time is: 471 秒
##至此,XGBoost模型调参结束,确定了最优参数和模型:
best_params = {
'learning_rate': 0.1,
'n_estimators':452,
'max_depth': 4,
'min_child_weight': 4,
'subsample': 0.9,
'colsample_bytree': 0.6,
'gamma': 0,
'reg_alpha': 1,
'reg_lambda': 1,
'n_jobs':-1,
'random_state':666
}
best_xgb = xgb.XGBRegressor(**best_params)
X_train = data[data['SalePrice'].notnull()].drop(['SalePrice'],axis=1)
Y_train = data[data['SalePrice'].notnull()].loc[:,'SalePrice'].values
X_test = data[data['SalePrice'].isnull()].drop(['SalePrice'], axis=1)
print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)
(1460, 294)
(1460,)
(1459, 294)
模型集成(融合)
基模型xgboost,使用bagging进行融合
from sklearn.ensemble import BaggingRegressor
def my_error_func(y_ture, y_pred):
error = np.sqrt(mean_squared_log_error(y_ture, y_pred))
return error
my_score = make_scorer(my_error_func, greater_is_better=False)
regr = BaggingRegressor(
base_estimator = best_xgb,
n_jobs = -1,
random_state = 66
)
model = GridSearchCV(
estimator=regr,
param_grid={'n_estimators':np.arange(1,20,1)},
scoring=my_score,
cv=5,
verbose=3,
n_jobs=-1
)
model.fit(X_train, Y_train)
print('参数的最佳取值:{0}'.format(model.best_params_))
print('最佳模型得分:{0}'.format(-model.best_score_))
Fitting 5 folds for each of 19 candidates, totalling 95 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 24 tasks | elapsed: 2.9min
[Parallel(n_jobs=-1)]: Done 95 out of 95 | elapsed: 38.4min finished
参数的最佳取值:{'n_estimators': 19}
最佳模型得分:0.12425136280271949
model.best_estimator_
BaggingRegressor(base_estimator=XGBRegressor(base_score=None, booster=None,
colsample_bylevel=None,
colsample_bynode=None,
colsample_bytree=0.6, gamma=0,
gpu_id=None,
importance_type='gain',
interaction_constraints=None,
learning_rate=0.1,
max_delta_step=None, max_depth=4,
min_child_weight=4, missing=nan,
monotone_constraints=None,
n_estimators=452, n_jobs=-1,
num_parallel_tree=None,
random_state=666, reg_alpha=1,
reg_lambda=1,
scale_pos_weight=None,
subsample=0.9, tree_method=None,
validate_parameters=None,
verbosity=None),
n_estimators=19, n_jobs=-1, random_state=66)
y_pred = model.best_estimator_.predict(X_test)
y_pred
array([125902.945, 163661.55 , 183499.3 , ..., 162691.47 , 113986.79 ,
220476.8 ], dtype=float32)
y_df = pd.DataFrame(data=y_pred,columns=['SalePrice'])
y_df.head()
SalePrice | |
---|---|
0 | 125902.945312 |
1 | 163661.546875 |
2 | 183499.296875 |
3 | 191795.031250 |
4 | 186873.843750 |
submission = pd.concat([X_test['Id'],y_df],axis=1)
submission.head()
Id | SalePrice | |
---|---|---|
0 | 1461 | 125902.945312 |
1 | 1462 | 163661.546875 |
2 | 1463 | 183499.296875 |
3 | 1464 | 191795.031250 |
4 | 1465 | 186873.843750 |
submission.to_csv('submission.csv',index=False)
# from sklearn.metrics import mean_squared_log_error
# RMSLE = np.sqrt( mean_squared_log_error(y_true, y_pred) )
# print("The score is %.5f" % RMSLE )
# train = data[data['SalePrice'].notnull()]
# test = data[data['SalePrice'].isnull()]
train.to_csv('./xgb1/train_after_fe.csv',index=False)
test.to_csv('./xgb1/test_after_fe.csv',index=False)