房价预测A

import pandas as pd     # 工具
import numpy as np
import joblib
import os

from sklearn.preprocessing import PowerTransformer, PolynomialFeatures, StandardScaler, LabelEncoder      # 预处理
from sklearn.ensemble import RandomForestRegressor      # 离群点和错误值筛选
from sklearn.ensemble import IsolationForest
from sklearn.feature_selection import variance_threshold    # 特征选择
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split, cross_val_score, KFold    # 切分数据集
from sklearn.linear_model import Lasso, Ridge, LinearRegression     # 算法
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from mlxtend import regressor
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error   # 评分
from hyperopt import Trials, tpe, pyll, fmin, hp    # 动态调参
import functools
import time
import matplotlib.pyplot as plt
E:\anaconda\envs\pd\lib\site-packages\sklearn\utils\deprecation.py:144: FutureWarning: The sklearn.feature_selection.variance_threshold module is  deprecated in version 0.22 and will be removed in version 0.24. The corresponding classes / functions should instead be imported from sklearn.feature_selection. Anything that cannot be imported from sklearn.feature_selection is now part of the private API.
  warnings.warn(message, FutureWarning)
'''
# 注:
# 1.以下数据处理均为非先验数据处理,即行业小白不主观合并特征,只有常规数据处理。
# 2.限于时间关系,这个程序没有进行模块化和严格调参,只是为了得到较高的R2以及验证思路的正确。

# 创新点1: 发现lasso回归更偏向于提取相关特征,而随机森林更偏向提取非相关特征,set(feature(随机森林)>阈值)-set(feature(Lasso)>阈值)可以提取非相关性特征。
# 由于特征处理一般会用相关系数提取特征的方法,加上这些非相关特征有奇效。
# 创新点2:数据小的话,可以遍历,去除对分数影响大的离群点;数量大的话时间会变慢,解决办法:一个是应用分布式来计算,
# 另一个可以将数据划分N份,观察每份均值。比如150个数据,分为 15组*10个(记为a1~15) 或者 10组*15个(记为:b1~10),分别测定。
# 如果a1和b1偏离均值,说明a1前五个可能含有离群点,分别测定。
'''
pd.set_option("display.max_columns", 500)
pd.set_option("display.width", 1000)
pd.set_option("display.max_rows", 500)
# pd.set_option("display.height", 1000)  # 测试时不能进行行高设置
train = pd.read_csv("E:/python/house_predict/data/train.csv")
x_test = pd.read_csv("E:/python/house_predict/data/test.csv")
y_test = pd.read_csv('E:/python/house_predict/data/sample_submission.csv')
train.shape, x_test.shape, y_test.shape
((1460, 81), (1459, 80), (1459, 2))
test = pd.concat([x_test, y_test.iloc[:, [-1]]], axis=1)
test.shape
(1459, 81)
str_col_list = ["MSSubClass", "OverallCond", "YrSold", "MoSold", "OverallQual"]
for col in str_col_list:
    train.loc[:, col] = train.loc[:, col].astype(str)
train.head()
IdMSSubClassMSZoningLotFrontageLotAreaStreetAlleyLotShapeLandContourUtilitiesLotConfigLandSlopeNeighborhoodCondition1Condition2BldgTypeHouseStyleOverallQualOverallCondYearBuiltYearRemodAddRoofStyleRoofMatlExterior1stExterior2ndMasVnrTypeMasVnrAreaExterQualExterCondFoundationBsmtQualBsmtCondBsmtExposureBsmtFinType1BsmtFinSF1BsmtFinType2BsmtFinSF2BsmtUnfSFTotalBsmtSFHeatingHeatingQCCentralAirElectrical1stFlrSF2ndFlrSFLowQualFinSFGrLivAreaBsmtFullBathBsmtHalfBathFullBathHalfBathBedroomAbvGrKitchenAbvGrKitchenQualTotRmsAbvGrdFunctionalFireplacesFireplaceQuGarageTypeGarageYrBltGarageFinishGarageCarsGarageAreaGarageQualGarageCondPavedDriveWoodDeckSFOpenPorchSFEnclosedPorch3SsnPorchScreenPorchPoolAreaPoolQCFenceMiscFeatureMiscValMoSoldYrSoldSaleTypeSaleConditionSalePrice
0160RL65.08450PaveNaNRegLvlAllPubInsideGtlCollgCrNormNorm1Fam2Story7520032003GableCompShgVinylSdVinylSdBrkFace196.0GdTAPConcGdTANoGLQ706Unf0150856GasAExYSBrkr85685401710102131Gd8Typ0NaNAttchd2003.0RFn2548TATAY0610000NaNNaNNaN022008WDNormal208500
1220RL80.09600PaveNaNRegLvlAllPubFR2GtlVeenkerFeedrNorm1Fam1Story6819761976GableCompShgMetalSdMetalSdNone0.0TATACBlockGdTAGdALQ978Unf02841262GasAExYSBrkr1262001262012031TA6Typ1TAAttchd1976.0RFn2460TATAY29800000NaNNaNNaN052007WDNormal181500
2360RL68.011250PaveNaNIR1LvlAllPubInsideGtlCollgCrNormNorm1Fam2Story7520012002GableCompShgVinylSdVinylSdBrkFace162.0GdTAPConcGdTAMnGLQ486Unf0434920GasAExYSBrkr92086601786102131Gd6Typ1TAAttchd2001.0RFn2608TATAY0420000NaNNaNNaN092008WDNormal223500
3470RL60.09550PaveNaNIR1LvlAllPubCornerGtlCrawforNormNorm1Fam2Story7519151970GableCompShgWd SdngWd ShngNone0.0TATABrkTilTAGdNoALQ216Unf0540756GasAGdYSBrkr96175601717101031Gd7Typ1GdDetchd1998.0Unf3642TATAY035272000NaNNaNNaN022006WDAbnorml140000
4560RL84.014260PaveNaNIR1LvlAllPubFR2GtlNoRidgeNormNorm1Fam2Story8520002000GableCompShgVinylSdVinylSdBrkFace350.0GdTAPConcGdTAAvGLQ655Unf04901145GasAExYSBrkr1145105302198102141Gd9Typ1TAAttchd2000.0RFn3836TATAY192840000NaNNaNNaN0122008WDNormal250000
missinglist = train.isnull().sum()[train.isnull().sum() != 0]
pd.concat([missinglist, train.loc[:, missinglist.index].dtypes], axis=1)
01
LotFrontage259float64
Alley1369object
MasVnrType8object
MasVnrArea8float64
BsmtQual37object
BsmtCond37object
BsmtExposure38object
BsmtFinType137object
BsmtFinType238object
Electrical1object
FireplaceQu690object
GarageType81object
GarageYrBlt81float64
GarageFinish81object
GarageQual81object
GarageCond81object
PoolQC1453object
Fence1179object
MiscFeature1406object
missing_df = pd.concat([missinglist, train.loc[:, missinglist.index].dtypes], axis=1)
missing_int_list = missing_df.loc[missing_df.iloc[:, 1] != 'object', :].index.tolist()
missing_str_list = missing_df.loc[missing_df.iloc[:, 1] == 'object', :].index.tolist()
missing_int_list, missing_str_list
(['LotFrontage', 'MasVnrArea', 'GarageYrBlt'],
 ['Alley',
  'MasVnrType',
  'BsmtQual',
  'BsmtCond',
  'BsmtExposure',
  'BsmtFinType1',
  'BsmtFinType2',
  'Electrical',
  'FireplaceQu',
  'GarageType',
  'GarageFinish',
  'GarageQual',
  'GarageCond',
  'PoolQC',
  'Fence',
  'MiscFeature'])
train.loc[:, missing_int_list] = train.loc[:, missing_int_list].fillna(0)
train.loc[:, missing_str_list] = train.loc[:, missing_str_list].fillna('AN')
train.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1460 entries, 0 to 1459
Data columns (total 81 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             1460 non-null   int64  
 1   MSSubClass     1460 non-null   object 
 2   MSZoning       1460 non-null   object 
 3   LotFrontage    1460 non-null   float64
 4   LotArea        1460 non-null   int64  
 5   Street         1460 non-null   object 
 6   Alley          1460 non-null   object 
 7   LotShape       1460 non-null   object 
 8   LandContour    1460 non-null   object 
 9   Utilities      1460 non-null   object 
 10  LotConfig      1460 non-null   object 
 11  LandSlope      1460 non-null   object 
 12  Neighborhood   1460 non-null   object 
 13  Condition1     1460 non-null   object 
 14  Condition2     1460 non-null   object 
 15  BldgType       1460 non-null   object 
 16  HouseStyle     1460 non-null   object 
 17  OverallQual    1460 non-null   object 
 18  OverallCond    1460 non-null   object 
 19  YearBuilt      1460 non-null   int64  
 20  YearRemodAdd   1460 non-null   int64  
 21  RoofStyle      1460 non-null   object 
 22  RoofMatl       1460 non-null   object 
 23  Exterior1st    1460 non-null   object 
 24  Exterior2nd    1460 non-null   object 
 25  MasVnrType     1460 non-null   object 
 26  MasVnrArea     1460 non-null   float64
 27  ExterQual      1460 non-null   object 
 28  ExterCond      1460 non-null   object 
 29  Foundation     1460 non-null   object 
 30  BsmtQual       1460 non-null   object 
 31  BsmtCond       1460 non-null   object 
 32  BsmtExposure   1460 non-null   object 
 33  BsmtFinType1   1460 non-null   object 
 34  BsmtFinSF1     1460 non-null   int64  
 35  BsmtFinType2   1460 non-null   object 
 36  BsmtFinSF2     1460 non-null   int64  
 37  BsmtUnfSF      1460 non-null   int64  
 38  TotalBsmtSF    1460 non-null   int64  
 39  Heating        1460 non-null   object 
 40  HeatingQC      1460 non-null   object 
 41  CentralAir     1460 non-null   object 
 42  Electrical     1460 non-null   object 
 43  1stFlrSF       1460 non-null   int64  
 44  2ndFlrSF       1460 non-null   int64  
 45  LowQualFinSF   1460 non-null   int64  
 46  GrLivArea      1460 non-null   int64  
 47  BsmtFullBath   1460 non-null   int64  
 48  BsmtHalfBath   1460 non-null   int64  
 49  FullBath       1460 non-null   int64  
 50  HalfBath       1460 non-null   int64  
 51  BedroomAbvGr   1460 non-null   int64  
 52  KitchenAbvGr   1460 non-null   int64  
 53  KitchenQual    1460 non-null   object 
 54  TotRmsAbvGrd   1460 non-null   int64  
 55  Functional     1460 non-null   object 
 56  Fireplaces     1460 non-null   int64  
 57  FireplaceQu    1460 non-null   object 
 58  GarageType     1460 non-null   object 
 59  GarageYrBlt    1460 non-null   float64
 60  GarageFinish   1460 non-null   object 
 61  GarageCars     1460 non-null   int64  
 62  GarageArea     1460 non-null   int64  
 63  GarageQual     1460 non-null   object 
 64  GarageCond     1460 non-null   object 
 65  PavedDrive     1460 non-null   object 
 66  WoodDeckSF     1460 non-null   int64  
 67  OpenPorchSF    1460 non-null   int64  
 68  EnclosedPorch  1460 non-null   int64  
 69  3SsnPorch      1460 non-null   int64  
 70  ScreenPorch    1460 non-null   int64  
 71  PoolArea       1460 non-null   int64  
 72  PoolQC         1460 non-null   object 
 73  Fence          1460 non-null   object 
 74  MiscFeature    1460 non-null   object 
 75  MiscVal        1460 non-null   int64  
 76  MoSold         1460 non-null   object 
 77  YrSold         1460 non-null   object 
 78  SaleType       1460 non-null   object 
 79  SaleCondition  1460 non-null   object 
 80  SalePrice      1460 non-null   int64  
dtypes: float64(3), int64(30), object(48)
memory usage: 924.0+ KB
rfr = RandomForestRegressor(500, n_jobs=-1, random_state=42)
train.drop(['Id'], axis=1, inplace=True)
train.shape
(1460, 80)
rfr.fit(train.iloc[:, 0:-1], train.iloc[:, [-1]])
---------------------------------------------------------------------------

ValueError                                Traceback (most recent call last)

<ipython-input-20-21f43b35ac6b> in <module>
----> 1 rfr.fit(train.iloc[:, 0:-1], train.iloc[:, [-1]])


E:\anaconda\envs\pd\lib\site-packages\sklearn\ensemble\_forest.py in fit(self, X, y, sample_weight)
    293         """
    294         # Validate or convert input data
--> 295         X = check_array(X, accept_sparse="csc", dtype=DTYPE)
    296         y = check_array(y, accept_sparse='csc', ensure_2d=False, dtype=None)
    297         if sample_weight is not None:


E:\anaconda\envs\pd\lib\site-packages\sklearn\utils\validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
    529                     array = array.astype(dtype, casting="unsafe", copy=False)
    530                 else:
--> 531                     array = np.asarray(array, order=order, dtype=dtype)
    532             except ComplexWarning:
    533                 raise ValueError("Complex data not supported\n"


E:\anaconda\envs\pd\lib\site-packages\numpy\core\_asarray.py in asarray(a, dtype, order)
     83 
     84     """
---> 85     return array(a, dtype, copy=False, order=order)
     86 
     87 


ValueError: could not convert string to float: 'RL'
train.corr()
LotFrontageLotAreaYearBuiltYearRemodAddMasVnrAreaBsmtFinSF1BsmtFinSF2BsmtUnfSFTotalBsmtSF1stFlrSF2ndFlrSFLowQualFinSFGrLivAreaBsmtFullBathBsmtHalfBathFullBathHalfBathBedroomAbvGrKitchenAbvGrTotRmsAbvGrdFireplacesGarageYrBltGarageCarsGarageAreaWoodDeckSFOpenPorchSFEnclosedPorch3SsnPorchScreenPorchPoolAreaMiscValSalePrice
LotFrontage1.0000000.1007390.0368530.0786860.1050100.076670-0.0093120.1608290.2382740.2451810.0425490.0499810.2203470.010514-0.0278560.120548-0.0129520.1444940.0344250.2213960.0440180.0193170.1652290.201473-0.0167800.0696050.0273660.0234990.0229690.114106-0.0596060.209624
LotArea0.1007391.0000000.0142280.0137880.1033210.2141030.111170-0.0026180.2608330.2994750.0509860.0047790.2631160.1581550.0480460.1260310.0142590.119690-0.0177840.1900150.2713640.0725990.1548710.1804030.1716980.084774-0.0183400.0204230.0431600.0776720.0380680.263843
YearBuilt0.0368530.0142281.0000000.5928550.3116000.249503-0.0491070.1490400.3914520.2819860.010308-0.1837840.1990100.187599-0.0381620.4682710.242656-0.070651-0.1748000.0955890.1477160.2720290.5378500.4789540.2248800.188686-0.3872680.031355-0.0503640.004950-0.0343830.522897
YearRemodAdd0.0786860.0137880.5928551.0000000.1765290.128451-0.0677590.1811330.2910660.2403790.140024-0.0624190.2873890.119470-0.0123370.4390460.183331-0.040581-0.1495980.1917400.1125810.1463570.4206220.3716000.2057260.226298-0.1939190.045286-0.0387400.005829-0.0102860.507101
MasVnrArea0.1050100.1033210.3116000.1765291.0000000.261256-0.0713300.1138620.3600670.3398500.173800-0.0686280.3880520.0830100.0274030.2729990.1991080.102775-0.0384500.2795680.2470150.1326960.3619450.3708840.1599910.122528-0.1099070.0191440.0622480.011928-0.0295120.472614
BsmtFinSF10.0766700.2141030.2495030.1284510.2612561.000000-0.050117-0.4952510.5223960.445863-0.137079-0.0645030.2081710.6492120.0674180.0585430.004262-0.107355-0.0810070.0443160.2600110.1158430.2240540.2969700.2043060.111761-0.1023030.0264510.0620210.1404910.0035710.386420
BsmtFinSF2-0.0093120.111170-0.049107-0.067759-0.071330-0.0501171.000000-0.2092940.1048100.097117-0.0992600.014807-0.0096400.1586780.070948-0.076444-0.032148-0.015728-0.040751-0.0352270.0469210.035070-0.038264-0.0182270.0678980.0030930.036543-0.0299930.0888710.0417090.004940-0.011378
BsmtUnfSF0.160829-0.0026180.1490400.1811330.113862-0.495251-0.2092941.0000000.4153600.3179870.0044690.0281670.240257-0.422900-0.0958040.288886-0.0411180.1666430.0300860.2506470.0515750.0427200.2141750.183303-0.0053160.129005-0.0025380.020764-0.012579-0.035092-0.0238370.214479
TotalBsmtSF0.2382740.2608330.3914520.2910660.3600670.5223960.1048100.4153601.0000000.819530-0.174512-0.0332450.4548680.307351-0.0003150.323722-0.0488040.050450-0.0689010.2855730.3395190.1763590.4345850.4866650.2320190.247264-0.0954780.0373840.0844890.126053-0.0184790.613581
1stFlrSF0.2451810.2994750.2819860.2403790.3398500.4458630.0971170.3179870.8195301.000000-0.202646-0.0142410.5660240.2446710.0019560.380637-0.1199160.1274010.0681010.4095160.4105310.1666420.4393170.4897820.2354590.211671-0.0652920.0561040.0887580.131525-0.0210960.605852
2ndFlrSF0.0425490.0509860.0103080.1400240.173800-0.137079-0.0992600.004469-0.174512-0.2026461.0000000.0633530.687501-0.169494-0.0238550.4213780.6097070.5029010.0593060.6164230.1945610.0644020.1839260.1383470.0921650.2080260.061989-0.0243580.0406060.0814870.0161970.319334
LowQualFinSF0.0499810.004779-0.183784-0.062419-0.068628-0.0645030.0148070.028167-0.033245-0.0142410.0633531.0000000.134683-0.047143-0.005842-0.000710-0.0270800.1056070.0075220.131185-0.021272-0.146467-0.094480-0.067601-0.0254440.0182510.061081-0.0042960.0267990.062157-0.003793-0.025606
GrLivArea0.2203470.2631160.1990100.2873890.3880520.208171-0.0096400.2402570.4548680.5660240.6875010.1346831.0000000.034836-0.0189180.6300120.4157720.5212700.1000630.8254890.4616790.1625430.4672470.4689970.2474330.3302240.0091130.0206430.1015100.170205-0.0024160.708624
BsmtFullBath0.0105140.1581550.1875990.1194700.0830100.6492120.158678-0.4229000.3073510.244671-0.169494-0.0471430.0348361.000000-0.147871-0.064512-0.030905-0.150673-0.041503-0.0532750.1379280.0492700.1318810.1791890.1753150.067341-0.049911-0.0001060.0231480.067616-0.0230470.227122
BsmtHalfBath-0.0278560.048046-0.038162-0.0123370.0274030.0674180.070948-0.095804-0.0003150.001956-0.023855-0.005842-0.018918-0.1478711.000000-0.054536-0.0123400.046519-0.037944-0.0238360.0289760.016811-0.020891-0.0245360.040161-0.025324-0.0085550.0351140.0321210.020025-0.007367-0.016844
FullBath0.1205480.1260310.4682710.4390460.2729990.058543-0.0764440.2888860.3237220.3806370.421378-0.0007100.630012-0.064512-0.0545361.0000000.1363810.3632520.1331150.5547840.2436710.1374640.4696720.4056560.1877030.259977-0.1150930.035353-0.0081060.049604-0.0142900.560664
HalfBath-0.0129520.0142590.2426560.1833310.1991080.004262-0.032148-0.041118-0.048804-0.1199160.609707-0.0270800.415772-0.030905-0.0123400.1363811.0000000.226651-0.0682630.3434150.2036490.1171660.2191780.1635490.1080800.199740-0.095317-0.0049720.0724260.0223810.0012900.284108
BedroomAbvGr0.1444940.119690-0.070651-0.0405810.102775-0.107355-0.0157280.1666430.0504500.1274010.5029010.1056070.521270-0.1506730.0465190.3632520.2266511.0000000.1985970.6766200.107570-0.0098500.0861060.0652530.0468540.0938100.041570-0.0244780.0443000.0707030.0077670.168213
KitchenAbvGr0.034425-0.017784-0.174800-0.149598-0.038450-0.081007-0.0407510.030086-0.0689010.0681010.0593060.0075220.100063-0.041503-0.0379440.133115-0.0682630.1985971.0000000.256045-0.123936-0.157991-0.050634-0.064433-0.090130-0.0700910.037312-0.024600-0.051613-0.0145250.062341-0.135907
TotRmsAbvGrd0.2213960.1900150.0955890.1917400.2795680.044316-0.0352270.2506470.2855730.4095160.6164230.1311850.825489-0.053275-0.0238360.5547840.3434150.6766200.2560451.0000000.3261140.0956070.3622890.3378220.1659840.2341920.004151-0.0066830.0593830.0837570.0247630.533723
Fireplaces0.0440180.2713640.1477160.1125810.2470150.2600110.0469210.0515750.3395190.4105310.194561-0.0212720.4616790.1379280.0289760.2436710.2036490.107570-0.1239360.3261141.0000000.1862640.3007890.2691410.2000190.169405-0.0248220.0112570.1845300.0950740.0014090.466929
GarageYrBlt0.0193170.0725990.2720290.1463570.1326960.1158430.0350700.0427200.1763590.1666420.064402-0.1464670.1625430.0492700.0168110.1374640.117166-0.009850-0.1579910.0956070.1862641.0000000.5980050.5607830.1173050.049877-0.0768220.0294010.0614160.015858-0.0066690.261366
GarageCars0.1652290.1548710.5378500.4206220.3619450.224054-0.0382640.2141750.4345850.4393170.183926-0.0944800.4672470.131881-0.0208910.4696720.2191780.086106-0.0506340.3622890.3007890.5980051.0000000.8824750.2263420.213569-0.1514340.0357650.0504940.020934-0.0430800.640409
GarageArea0.2014730.1804030.4789540.3716000.3708840.296970-0.0182270.1833030.4866650.4897820.138347-0.0676010.4689970.179189-0.0245360.4056560.1635490.065253-0.0644330.3378220.2691410.5607830.8824751.0000000.2246660.241435-0.1217770.0350870.0514120.061047-0.0274000.623431
WoodDeckSF-0.0167800.1716980.2248800.2057260.1599910.2043060.067898-0.0053160.2320190.2354590.092165-0.0254440.2474330.1753150.0401610.1877030.1080800.046854-0.0901300.1659840.2000190.1173050.2263420.2246661.0000000.058661-0.125989-0.032771-0.0741810.073378-0.0095510.324413
OpenPorchSF0.0696050.0847740.1886860.2262980.1225280.1117610.0030930.1290050.2472640.2116710.2080260.0182510.3302240.067341-0.0253240.2599770.1997400.093810-0.0700910.2341920.1694050.0498770.2135690.2414350.0586611.000000-0.093079-0.0058420.0743040.060762-0.0185840.315856
EnclosedPorch0.027366-0.018340-0.387268-0.193919-0.109907-0.1023030.036543-0.002538-0.095478-0.0652920.0619890.0610810.009113-0.049911-0.008555-0.115093-0.0953170.0415700.0373120.004151-0.024822-0.076822-0.151434-0.121777-0.125989-0.0930791.000000-0.037305-0.0828640.0542030.018361-0.128578
3SsnPorch0.0234990.0204230.0313550.0452860.0191440.026451-0.0299930.0207640.0373840.056104-0.024358-0.0042960.020643-0.0001060.0351140.035353-0.004972-0.024478-0.024600-0.0066830.0112570.0294010.0357650.035087-0.032771-0.005842-0.0373051.000000-0.031436-0.0079920.0003540.044584
ScreenPorch0.0229690.043160-0.050364-0.0387400.0622480.0620210.088871-0.0125790.0844890.0887580.0406060.0267990.1015100.0231480.032121-0.0081060.0724260.044300-0.0516130.0593830.1845300.0614160.0504940.051412-0.0741810.074304-0.082864-0.0314361.0000000.0513070.0319460.111447
PoolArea0.1141060.0776720.0049500.0058290.0119280.1404910.041709-0.0350920.1260530.1315250.0814870.0621570.1702050.0676160.0200250.0496040.0223810.070703-0.0145250.0837570.0950740.0158580.0209340.0610470.0733780.0607620.054203-0.0079920.0513071.0000000.0296690.092404
MiscVal-0.0596060.038068-0.034383-0.010286-0.0295120.0035710.004940-0.023837-0.018479-0.0210960.016197-0.003793-0.002416-0.023047-0.007367-0.0142900.0012900.0077670.0623410.0247630.001409-0.006669-0.043080-0.027400-0.009551-0.0185840.0183610.0003540.0319460.0296691.000000-0.021190
SalePrice0.2096240.2638430.5228970.5071010.4726140.386420-0.0113780.2144790.6135810.6058520.319334-0.0256060.7086240.227122-0.0168440.5606640.2841080.168213-0.1359070.5337230.4669290.2613660.6404090.6234310.3244130.315856-0.1285780.0445840.1114470.092404-0.0211901.000000
train.corr().shape
(32, 32)
corr_int_sr = train.corr().sort_values(by='SalePrice').iloc[:, -1].sort_values(ascending=False)
train.corr().sort_values(by='SalePrice').iloc[:, -1].sort_values(ascending=False)
SalePrice        1.000000
GrLivArea        0.708624
GarageCars       0.640409
GarageArea       0.623431
TotalBsmtSF      0.613581
1stFlrSF         0.605852
FullBath         0.560664
TotRmsAbvGrd     0.533723
YearBuilt        0.522897
YearRemodAdd     0.507101
MasVnrArea       0.472614
Fireplaces       0.466929
BsmtFinSF1       0.386420
WoodDeckSF       0.324413
2ndFlrSF         0.319334
OpenPorchSF      0.315856
HalfBath         0.284108
LotArea          0.263843
GarageYrBlt      0.261366
BsmtFullBath     0.227122
BsmtUnfSF        0.214479
LotFrontage      0.209624
BedroomAbvGr     0.168213
ScreenPorch      0.111447
PoolArea         0.092404
3SsnPorch        0.044584
BsmtFinSF2      -0.011378
BsmtHalfBath    -0.016844
MiscVal         -0.021190
LowQualFinSF    -0.025606
EnclosedPorch   -0.128578
KitchenAbvGr    -0.135907
Name: SalePrice, dtype: float64
train_int = train.loc[:, train.dtypes != 'object']
train_str = train.loc[:, train.dtypes == 'object']
list(train.loc[:, 'SalePrice'].groupby(train_str.loc[:, 'OverallQual']).mean().sort_values().index)
['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']
train_new = train.copy()
corr_str_list = []
lbl = LabelEncoder()
for col in list(train_str.columns):
    # lbl.fit(train.loc[:, 'OverallQual'])
    lbl.classes_ = list(train.loc[:, 'SalePrice'].groupby(train_str.loc[:, col]).mean().sort_values().index)
    train_new.loc[:, col] = lbl.transform(train.loc[:, col])
    corr_str_list.append(pd.concat([pd.DataFrame(lbl.transform(train.loc[:, col])), np.log1p(train.iloc[:, -1])], axis=1).corr('spearman').iloc[0, 1])
corr_str_sr = pd.Series(data=corr_str_list, index=train_str.columns)
corr_str_sr.sort_values(ascending=False)
OverallQual      0.809829
Neighborhood     0.755779
ExterQual        0.684014
BsmtQual         0.678026
KitchenQual      0.672849
GarageFinish     0.633974
GarageType       0.598814
MSSubClass       0.583130
Foundation       0.573580
FireplaceQu      0.542237
HeatingQC        0.491392
BsmtFinType1     0.454237
Exterior1st      0.435284
MasVnrType       0.433267
Exterior2nd      0.423422
MSZoning         0.422232
OverallCond      0.387181
GarageCond       0.351653
GarageQual       0.351110
BsmtExposure     0.344207
HouseStyle       0.340038
LotShape         0.321184
SaleCondition    0.320496
CentralAir       0.313286
SaleType         0.307845
Electrical       0.297578
PavedDrive       0.280602
BsmtCond         0.269474
Fence            0.209028
Condition1       0.202877
RoofStyle        0.163782
BsmtFinType2     0.155398
LandContour      0.154423
BldgType         0.151785
Functional       0.136477
ExterCond        0.130370
Alley            0.130207
Heating          0.121949
Condition2       0.107173
LotConfig        0.105029
RoofMatl         0.090497
MoSold           0.081074
MiscFeature      0.078318
PoolQC           0.058495
LandSlope        0.050310
Street           0.045814
YrSold           0.024302
Utilities        0.016710
dtype: float64
pd.concat([corr_int_sr, corr_str_sr], axis=0).sort_values(ascending=False)
SalePrice        1.000000
OverallQual      0.809829
Neighborhood     0.755779
GrLivArea        0.708624
ExterQual        0.684014
BsmtQual         0.678026
KitchenQual      0.672849
GarageCars       0.640409
GarageFinish     0.633974
GarageArea       0.623431
TotalBsmtSF      0.613581
1stFlrSF         0.605852
GarageType       0.598814
MSSubClass       0.583130
Foundation       0.573580
FullBath         0.560664
FireplaceQu      0.542237
TotRmsAbvGrd     0.533723
YearBuilt        0.522897
YearRemodAdd     0.507101
HeatingQC        0.491392
MasVnrArea       0.472614
Fireplaces       0.466929
BsmtFinType1     0.454237
Exterior1st      0.435284
MasVnrType       0.433267
Exterior2nd      0.423422
MSZoning         0.422232
OverallCond      0.387181
BsmtFinSF1       0.386420
GarageCond       0.351653
GarageQual       0.351110
BsmtExposure     0.344207
HouseStyle       0.340038
WoodDeckSF       0.324413
LotShape         0.321184
SaleCondition    0.320496
2ndFlrSF         0.319334
OpenPorchSF      0.315856
CentralAir       0.313286
SaleType         0.307845
Electrical       0.297578
HalfBath         0.284108
PavedDrive       0.280602
BsmtCond         0.269474
LotArea          0.263843
GarageYrBlt      0.261366
BsmtFullBath     0.227122
BsmtUnfSF        0.214479
LotFrontage      0.209624
Fence            0.209028
Condition1       0.202877
BedroomAbvGr     0.168213
RoofStyle        0.163782
BsmtFinType2     0.155398
LandContour      0.154423
BldgType         0.151785
Functional       0.136477
ExterCond        0.130370
Alley            0.130207
Heating          0.121949
ScreenPorch      0.111447
Condition2       0.107173
LotConfig        0.105029
PoolArea         0.092404
RoofMatl         0.090497
MoSold           0.081074
MiscFeature      0.078318
PoolQC           0.058495
LandSlope        0.050310
Street           0.045814
3SsnPorch        0.044584
YrSold           0.024302
Utilities        0.016710
BsmtFinSF2      -0.011378
BsmtHalfBath    -0.016844
MiscVal         -0.021190
LowQualFinSF    -0.025606
EnclosedPorch   -0.128578
KitchenAbvGr    -0.135907
dtype: float64
feature_list = list(pd.concat([corr_int_sr, corr_str_sr], axis=0).sort_values(ascending=False).index[0:41])
feature_list.reverse()
feature_list
['SaleType',
 'CentralAir',
 'OpenPorchSF',
 '2ndFlrSF',
 'SaleCondition',
 'LotShape',
 'WoodDeckSF',
 'HouseStyle',
 'BsmtExposure',
 'GarageQual',
 'GarageCond',
 'BsmtFinSF1',
 'OverallCond',
 'MSZoning',
 'Exterior2nd',
 'MasVnrType',
 'Exterior1st',
 'BsmtFinType1',
 'Fireplaces',
 'MasVnrArea',
 'HeatingQC',
 'YearRemodAdd',
 'YearBuilt',
 'TotRmsAbvGrd',
 'FireplaceQu',
 'FullBath',
 'Foundation',
 'MSSubClass',
 'GarageType',
 '1stFlrSF',
 'TotalBsmtSF',
 'GarageArea',
 'GarageFinish',
 'GarageCars',
 'KitchenQual',
 'BsmtQual',
 'ExterQual',
 'GrLivArea',
 'Neighborhood',
 'OverallQual',
 'SalePrice']
train_last = train_new.loc[:, feature_list]
train_last
SaleTypeCentralAirOpenPorchSF2ndFlrSFSaleConditionLotShapeWoodDeckSFHouseStyleBsmtExposureGarageQualGarageCondBsmtFinSF1OverallCondMSZoningExterior2ndMasVnrTypeExterior1stBsmtFinType1FireplacesMasVnrAreaHeatingQCYearRemodAddYearBuiltTotRmsAbvGrdFireplaceQuFullBathFoundationMSSubClassGarageType1stFlrSFTotalBsmtSFGarageAreaGarageFinishGarageCarsKitchenQualBsmtQualExterQualGrLivAreaNeighborhoodOverallQualSalePrice
041618544006135706731221160196.04200320038125145856856548222321710166208500
1410040298543597853514410.0419761976632211512621262460221311262205181500
241428664106235486731221161162.04200220016325145920920608222321786166223500
34135756110613521673815410.0319701915741192961756642132211717176140000
441841053411926335655731221161350.0420002000932514511451145836232322198247250000
..............................................................................................................................
14554140694400613507312111510.04200019997325145953953460221311647145175000
1456410040349513579043104942119.0219881978732211520731542500121312073135210000
14574160115240061352758313112620.042006194194239511881152252212232340176266500
145841004036652354943514600.0319961950511211510781078240112211078104142125
14594168040736513583043918200.031965196561121151256125627631122125644147500

1460 rows × 41 columns

X_train, X_test, y_train, y_test = train_test_split(train_last.iloc[:, 0:-1], train_last.iloc[:, [-1]], test_size=0.2, random_state=7)
lr = LinearRegression()
lr.fit(X_train, y_train)
print(lr.predict(X_test.iloc[[142], :]))
print(y_test.iloc[[142], :])
print(lr.score(X_test, y_test))
print(lr.score(X_train, y_train))
[[127789.21407131]]
    SalePrice
99     128950
0.8449398174883179
0.8236402408131976
ls = Lasso(alpha=1, max_iter=100000)
ls.fit(X_train, y_train)
print(ls.predict(X_test.iloc[[142], :]))
print(y_test.iloc[[142], :])
print(ls.score(X_test, y_test))
print(ls.score(X_train, y_train))
[127780.18771703]
    SalePrice
99     128950
0.8449411977832546
0.8236402205170005
rg = Ridge(alpha=50)
rg.fit(X_train, y_train)
print(rg.predict(X_test.iloc[[142], :]))
print(y_test.iloc[[142], :])
print(rg.score(X_test, y_test))
print(rg.score(X_train, y_train))
[[126604.3116898]]
    SalePrice
99     128950
0.8453340855508833
0.8230610246916847
xgb = XGBRegressor()
xgb.fit(X_train, y_train)
print(xgb.predict(X_test.iloc[[142], :]))
print(y_test.iloc[[142], :])
print(xgb.score(X_test, y_test))
print(xgb.score(X_train, y_train))
[125012.68]
    SalePrice
99     128950
0.8980734270131909
0.9997334320624469
rfr.fit(train_new.iloc[:, 0:-1], train_new.iloc[:, -1])
RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=500, n_jobs=-1, oob_score=False,
                      random_state=42, verbose=0, warm_start=False)
rfr.feature_importances_
array([4.02810692e-03, 9.87158518e-04, 5.80719799e-03, 1.10726065e-02,
       5.66349085e-06, 3.40831981e-04, 8.68797202e-04, 1.09590646e-03,
       1.03940738e-06, 7.77616131e-04, 4.06918767e-04, 8.89175416e-02,
       5.44486278e-04, 1.69459949e-04, 3.63927429e-04, 4.60310666e-04,
       5.27892083e-01, 2.88893586e-03, 4.78030371e-03, 6.30284045e-03,
       8.49322248e-04, 3.16173901e-04, 1.74585304e-03, 1.86844967e-03,
       1.08696824e-03, 4.68242529e-03, 3.20662289e-03, 5.49993600e-04,
       4.63504712e-04, 5.21659255e-03, 1.15031549e-03, 2.90474717e-03,
       1.12174740e-03, 2.55563048e-02, 3.60971127e-04, 5.25565579e-04,
       5.10778467e-03, 3.30246235e-02, 7.82016430e-05, 7.70467614e-04,
       1.72899855e-03, 2.09468873e-04, 2.69327020e-02, 3.60331201e-02,
       1.05300010e-04, 1.01772805e-01, 1.08479918e-03, 3.05482508e-04,
       4.48049307e-03, 8.98748530e-04, 1.90311062e-03, 3.47992453e-04,
       4.37312822e-03, 6.27560636e-03, 4.09108527e-04, 1.61260129e-03,
       2.74604972e-03, 2.35392831e-03, 4.44715688e-03, 1.67374818e-03,
       1.84101275e-02, 1.33559120e-02, 4.84571362e-04, 3.86757072e-04,
       4.28789849e-04, 4.46564334e-03, 4.28807668e-03, 8.05120185e-04,
       3.47685029e-04, 1.19347586e-03, 3.37271377e-04, 4.24336200e-04,
       4.46354910e-04, 3.06589136e-05, 4.68966462e-05, 2.26130147e-03,
       1.70978149e-03, 1.15257113e-03, 1.43195285e-03])
plt.bar(np.arange(train_new.shape[1]-1), rfr.feature_importances_)
<BarContainer object of 79 artists>

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-uq8E0y1k-1615021016029)(output_38_1.png)]

imp_sr = pd.Series(data=rfr.feature_importances_, index=train_new.columns[0:-1]).sort_values(ascending=False)
imp_sr_list = imp_sr.index[0:40]
feature_list = list(pd.concat([corr_int_sr, corr_str_sr], axis=0).sort_values(ascending=False).index[0:41])
feature_list.pop(0)
'SalePrice'
# 非相关性的重要特征
non_corr_imp_list = set(imp_sr_list) - set(feature_list)
set(imp_sr_list) - set(feature_list)
{'BedroomAbvGr',
 'BsmtCond',
 'BsmtUnfSF',
 'GarageYrBlt',
 'LotArea',
 'LotFrontage',
 'MoSold',
 'ScreenPorch',
 'YrSold'}
# 重要特征和相关特征
corr_imp_list = set(list(imp_sr_list) + list(feature_list))
set(list(imp_sr_list) + list(feature_list))
{'1stFlrSF',
 '2ndFlrSF',
 'BedroomAbvGr',
 'BsmtCond',
 'BsmtExposure',
 'BsmtFinSF1',
 'BsmtFinType1',
 'BsmtQual',
 'BsmtUnfSF',
 'CentralAir',
 'ExterQual',
 'Exterior1st',
 'Exterior2nd',
 'FireplaceQu',
 'Fireplaces',
 'Foundation',
 'FullBath',
 'GarageArea',
 'GarageCars',
 'GarageCond',
 'GarageFinish',
 'GarageQual',
 'GarageType',
 'GarageYrBlt',
 'GrLivArea',
 'HeatingQC',
 'HouseStyle',
 'KitchenQual',
 'LotArea',
 'LotFrontage',
 'LotShape',
 'MSSubClass',
 'MSZoning',
 'MasVnrArea',
 'MasVnrType',
 'MoSold',
 'Neighborhood',
 'OpenPorchSF',
 'OverallCond',
 'OverallQual',
 'SaleCondition',
 'SaleType',
 'ScreenPorch',
 'TotRmsAbvGrd',
 'TotalBsmtSF',
 'WoodDeckSF',
 'YearBuilt',
 'YearRemodAdd',
 'YrSold'}
train_last = train_new.loc[:,list(corr_imp_list) + ['SalePrice']]
train_last
MoSoldMSZoningBsmtFinSF1GarageCondMSSubClassLotAreaWoodDeckSFGarageFinishSaleConditionSaleTypeYearBuiltGarageAreaBedroomAbvGrFoundationCentralAirBsmtFinType1Exterior1stFullBathGarageTypeGarageQualExterQualFireplacesKitchenQual1stFlrSFBsmtCondGrLivAreaHeatingQCHouseStyleLotFrontageBsmtExposureOpenPorchSFBsmtQualLotShapeMasVnrTypeOverallQualMasVnrAreaTotalBsmtSFYrSoldBsmtUnfSFNeighborhood2ndFlrSFExterior2ndTotRmsAbvGrdScreenPorchGarageCarsFireplaceQuGarageYrBltOverallCondYearRemodAddSalePrice
033706514845002442003548351611253202856317104665.01613026196.08560150168541280212003.072003208500
11397851196002982441976460321442531111262312624580.04030150.012624284200560231976.051976181500
21134865141125002442001608351611253212920317864668.02423126162.09200434168661260232001.072002223500
3332165995500114191564231145123112961417173660.013521160.0756354017756870341998.071970140000
4936555141426019224420008364516112532121145321984684.03843127350.0114504902410531290332000.072000250000
.........................................................................................................................................................
1455730514791702441999460351511253111953316474662.014030150.09534953146941270231999.072000175000
145633790511131753491441978500321492531212073320732585.0103045119.0154215891301070231978.041988210000
145713275599042024419412524316122533221188423404666.016020160.0115218771711521390141941.082006266500
1458034951197173661441950240221641531021078310783568.02020140.0107810100550111950.041996142125
14592383051199377363441965276321281532011256312563575.016820140.01256013640960111965.041965147500

1460 rows × 50 columns

X_train, X_test, y_train, y_test = train_test_split(train_last.iloc[:, 0:-1], train_last.iloc[:, [-1]], test_size=0.2, random_state=7)
lr = LinearRegression()
lr.fit(X_train, y_train)
print(lr.predict(X_test.iloc[[142], :]))
print(y_test.iloc[[142], :])
print(lr.score(X_test, y_test))
print(lr.score(X_train, y_train))
[[128705.97633884]]
    SalePrice
99     128950
0.8519543734556843
0.8266965527930652
ls = Lasso(alpha=100, max_iter=100000)
ls.fit(X_train, y_train)
print(ls.predict(X_test.iloc[[142], :]))
print(y_test.iloc[[142], :])
print(ls.score(X_test, y_test))
print(ls.score(X_train, y_train))
[128194.41905277]
    SalePrice
99     128950
0.8524312721810778
0.8264674896727493
xgb = XGBRegressor()
xgb.fit(X_train, y_train)
print(xgb.predict(X_test.iloc[[142], :]))
print(y_test.iloc[[142], :])
print(xgb.score(X_test, y_test))
print(xgb.score(X_train, y_train))
[137806.88]
    SalePrice
99     128950
0.9013816373209745
0.99986585398344
rg = Ridge(alpha=50)
rg.fit(X_train, y_train)
print(rg.predict(X_test.iloc[[142], :]))
print(y_test.iloc[[142], :])
print(rg.score(X_test, y_test))
print(rg.score(X_train, y_train))
[[128224.40637062]]
    SalePrice
99     128950
0.8519237019479504
0.8260130780293145
lgbm = LGBMRegressor()
lgbm.fit(X_train, y_train)
print(lgbm.predict(X_test.iloc[[142], :]))
print(y_test.iloc[[142], :])
print(lgbm.score(X_test, y_test))
print(lgbm.score(X_train, y_train))
[122699.63086328]
    SalePrice
99     128950
0.9084717579343212
0.9805903170794543
stack = regressor.StackingRegressor(regressors=[ls, xgb, lgbm], meta_regressor=rg)
stack.fit(X_train, y_train)
print(stack.predict(X_test.iloc[[142], :]))
print(y_test.iloc[[142], :])
print(stack.score(X_test, y_test))
print(stack.score(X_train, y_train))
[[137983.58172137]]
    SalePrice
99     128950
0.9008161248536086
0.9998717981042333
X_train, X_test, y_train, y_test = train_test_split(train_last.iloc[:, 0:-1], np.log1p(train_last.iloc[:, [-1]]), test_size=0.2, random_state=7)
lr = LinearRegression()
lr.fit(X_train, y_train)
print(lr.predict(X_test.iloc[[142], :]))
print(y_test.iloc[[142], :])
print(lr.score(X_test, y_test))
print(lr.score(X_train, y_train))
print(np.sqrt(mean_squared_error(np.expm1(y_test), np.expm1(lr.predict(X_test)))))
# time_1 = time.time()
# error_list = np.sqrt(-cross_val_score(lr, X_train, y_train, scoring='neg_mean_squared_error', cv=len(X_train)))
# print(error_list)
# time_2 = time.time()
# print(time_2-time_1)
print(np.sqrt(cross_val_score(lr, X_train, y_train, scoring='r2', cv=5)))
[[11.6323596]]
    SalePrice
99  11.767188
0.9014287370300434
0.8814179338517563
27407.459338824414
[0.94578428 0.76389078 0.93094413 0.94086894 0.95391384]
ls = Lasso(alpha=0.002, max_iter=100000)
ls.fit(X_train, y_train)
print(ls.predict(X_test.iloc[[142], :]))
print(y_test.iloc[[142], :])
print(ls.score(X_test, y_test))
print(ls.score(X_train, y_train))
print(np.sqrt(mean_squared_error(np.expm1(y_test), np.expm1(ls.predict(X_test)))))
time_1 = time.time()
error_list = np.sqrt(-cross_val_score(lr, X_train, y_train, scoring='neg_mean_squared_error', cv=len(X_train)))
print(error_list)
time_2 = time.time()
print(time_2-time_1)
print(np.sqrt(cross_val_score(ls, X_train, y_train, scoring='r2', cv=5)))
[11.6227277]
    SalePrice
99  11.767188
0.9023902700479108
0.8795640077417723
27517.076624711135
[0.07372533 0.05254256 0.06465484 ... 0.09983587 0.04480408 0.20046456]
12.748914957046509
[0.94626087 0.75131723 0.92972572 0.94094776 0.95548991]
rg = Ridge(alpha=1)
rg.fit(X_train, y_train)
print(rg.predict(X_test.iloc[[142], :]))
print(y_test.iloc[[142], :])
print(rg.score(X_test, y_test))
print(rg.score(X_train, y_train))
print(np.sqrt(mean_squared_error(np.expm1(y_test), np.expm1(rg.predict(X_test)))))
print(np.sqrt(-cross_val_score(rg, X_train, y_train, scoring='neg_mean_squared_error', cv=5)))
print(np.sqrt(cross_val_score(rg, X_train, y_train, scoring='r2', cv=5)))
[[11.63177696]]
    SalePrice
99  11.767188
0.9015510582176212
0.8814164537644407
27409.814767861015
[0.12599009 0.24197056 0.14567151 0.12624434 0.13110084]
[0.9457901  0.76392329 0.93089761 0.9408726  0.9540249 ]
# xgb = XGBRegressor()
xgb = XGBRegressor(learning_rate=0.02, max_depth=3, n_estimators=1000)
xgb.fit(X_train, y_train)
print(xgb.predict(X_test.iloc[[142], :]))
print(y_test.iloc[[142], :])
print(xgb.score(X_test, y_test))
print(xgb.score(X_train, y_train))
print(np.sqrt(mean_squared_error(np.expm1(y_test), np.expm1(xgb.predict(X_test)))))
print(np.sqrt(mean_squared_error(np.expm1(y_train), np.expm1(xgb.predict(X_train)))))
print(np.sqrt(-cross_val_score(xgb, X_train, y_train, scoring='neg_mean_squared_error', cv=5)))
print(np.sqrt(cross_val_score(xgb, X_train, y_train, scoring='r2', cv=5)))
[11.665708]
    SalePrice
99  11.767188
0.9192435376855344
0.9698160291558459
25198.975648759053
12138.937999540736
[0.12729575 0.15484294 0.14307831 0.1093134  0.11721916]
[0.94462762 0.91075464 0.93342307 0.95601169 0.96342153]
lgbm = LGBMRegressor(num_leaves=7, learning_rate=0.01, n_estimators=1000)
lgbm.fit(X_train, y_train)
print(lgbm.predict(X_test.iloc[[142], :]))
print(y_test.iloc[[142], :])
print(lgbm.score(X_test, y_test))
print(lgbm.score(X_train, y_train))
print(np.sqrt(mean_squared_error(np.expm1(y_test), np.expm1(lgbm.predict(X_test)))))
print(np.sqrt(-cross_val_score(lgbm, X_train, y_train, scoring='neg_mean_squared_error', cv=5)))
print(np.sqrt(cross_val_score(lgbm, X_train, y_train, scoring='r2', cv=5)))
[11.67980588]
    SalePrice
99  11.767188
0.9123717713040634
0.9507868084209785
27571.85575283145
[0.12858805 0.15503547 0.14073726 0.11536931 0.12916114]
[0.94346381 0.91052166 0.9356583  0.95087378 0.95540682]
stack = regressor.StackingRegressor(regressors=[ls, xgb, lgbm], meta_regressor=rg)
stack.fit(X_train, y_train)
print(stack.predict(X_test.iloc[[142], :]))
print(y_test.iloc[[142], :])
print(stack.score(X_test, y_test))
print(stack.score(X_train, y_train))
print(np.sqrt(-cross_val_score(stack, X_train, y_train, scoring='neg_mean_squared_error', cv=5)))
print(np.sqrt(cross_val_score(stack, X_train, y_train, scoring='r2', cv=5)))
[[11.67493043]]
    SalePrice
99  11.767188
0.9161487095970527
0.9711736239829509
[0.12989797 0.14590182 0.14341223 0.10997372 0.12084481]
[0.94227063 0.92119425 0.93310079 0.95546623 0.96107729]
X_train
MoSoldMSZoningBsmtFinSF1GarageCondMSSubClassLotAreaWoodDeckSFGarageFinishSaleConditionSaleTypeYearBuiltGarageAreaBedroomAbvGrFoundationCentralAirBsmtFinType1Exterior1stFullBathGarageTypeGarageQualExterQualFireplacesKitchenQual1stFlrSFBsmtCondGrLivAreaHeatingQCHouseStyleLotFrontageBsmtExposureOpenPorchSFBsmtQualLotShapeMasVnrTypeOverallQualMasVnrAreaTotalBsmtSFYrSoldBsmtUnfSFNeighborhood2ndFlrSFExterior2ndTotRmsAbvGrdScreenPorchGarageCarsFireplaceQuGarageYrBltOverallCondYearRemodAdd
108714051410574160244200510433515112532121082319534685.025030170.0108221082198711290342005.072006
18773026104100144191618032158222101808316564260.01010140.066026605704980111916.061987
67182551596629147144192530031025223101697313692654.01020150.067221214672460111930.041950
652830514875002441996512251511253212909317164670.0112030160.08802880168071270231996.071996
126661053101200144191030842055123011964318892360.01021160.092549255925490141960.021950
......................................................................................................................................................
21153365111042010024420094603516112531021212312124583.022230150.0121211176401260212009.072009
50203698211917001441965461221441221011214312144570.01020140.01214442040560211965.061965
53703600511127352161431972576321241231018643864250.01021130.08640264100550211980.071972
12201033125117800011419642882211515310191239122566.01021140.091230100450111964.071964
175234775111261501441950576421462531122158421582584.032920150.01202472540870241950.062001

1168 rows × 49 columns

def cross_test_score(X_train, y_train):
    mean_list = []
    lr = LinearRegression()
    cv=len(X_train)
    for index in [x for x in range(cv)]:
        if index == 0:
            lr.fit(X_train.iloc[1:cv, :], y_train.iloc[1:cv, :])
        elif index == (cv-1):
            lr.fit(X_train.iloc[0:cv, :], y_train.iloc[0:cv, :])
        else:
            a = np.arange(X_train.shape[0]).tolist()
            a.pop(index)
            lr.fit(X_train.iloc[a, :], y_train.iloc[a, :])
        y_pred = lr.predict(X_train.iloc[[index], :])
        mean_abs_error = np.sqrt(mean_squared_error(y_train.iloc[[index], :], y_pred))
        mean_list.append(mean_abs_error)
    return mean_list
X_train.index = range(X_train.shape[0])
y_train.index = range(y_train.shape[0])
mean_list = cross_test_score(X_train, y_train)
plt.plot(range(len(mean_list)), mean_list)
[<matplotlib.lines.Line2D at 0x16af3208d68>]

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-R3M1If7S-1615021016040)(output_62_1.png)]

a = pd.Series(mean_list, index=range(len(mean_list))).sort_values()
a
212     0.000084
816     0.000101
312     0.000261
983     0.000311
111     0.000443
          ...   
1061    0.695339
515     0.750419
437     0.761170
342     1.269573
464     2.359217
Length: 1168, dtype: float64
X_train.iloc[[342, 464], :]
MoSoldMSZoningBsmtFinSF1GarageCondMSSubClassLotAreaWoodDeckSFGarageFinishSaleConditionSaleTypeYearBuiltGarageAreaBedroomAbvGrFoundationCentralAirBsmtFinType1Exterior1stFullBathGarageTypeGarageQualExterQualFireplacesKitchenQual1stFlrSFBsmtCondGrLivAreaHeatingQCHouseStyleLotFrontageBsmtExposureOpenPorchSFBsmtQualLotShapeMasVnrTypeOverallQualMasVnrAreaTotalBsmtSFYrSoldBsmtUnfSFNeighborhood2ndFlrSFExterior2ndTotRmsAbvGrdScreenPorchGarageCarsFireplaceQuGarageYrBltOverallCondYearRemodAdd
34243226051440094208358200788435161236331331383467646130.044064149762.0313848784153813110342007.072008
46463564451463887214358200814183516725333346923564246313.042924249796.06110046649506120242008.072008
X_train.drop([342, 464], axis=0, inplace=True)
y_train.drop([342, 464], axis=0, inplace=True)
rg = Ridge(alpha=1)
rg.fit(X_train, y_train)
print(rg.predict(X_test.iloc[[142], :]))
print(y_test.iloc[[142], :])
print(rg.score(X_test, y_test))
print(rg.score(X_train, y_train))
print(np.sqrt(mean_squared_error(np.expm1(y_test), np.expm1(rg.predict(X_test)))))
print(np.sqrt(-cross_val_score(rg, X_train, y_train, scoring='neg_mean_squared_error', cv=5)))
print(np.sqrt(cross_val_score(rg, X_train, y_train, scoring='r2', cv=5)))    
# 目前最优的,剩下的都是尝试,但证明达不到这个效果。
[[11.64851484]]
    SalePrice
99  11.767188
0.9151227578383072
0.9144284746375823
21680.19889202489
[0.11325024 0.12770313 0.13376579 0.11558958 0.12319022]
[0.95643737 0.94056018 0.94224296 0.95068114 0.95951961]
ls = Lasso(alpha=0.002, max_iter=100000)
ls.fit(X_train, y_train)
print(ls.predict(X_test.iloc[[142], :]))
print(y_test.iloc[[142], :])
print(ls.score(X_test, y_test))
print(ls.score(X_train, y_train))
print(np.sqrt(mean_squared_error(np.expm1(y_test), np.expm1(ls.predict(X_test)))))
time_1 = time.time()
error_list = np.sqrt(-cross_val_score(lr, X_train, y_train, scoring='neg_mean_squared_error', cv=len(X_train)))
print(error_list)
time_2 = time.time()
print(time_2-time_1)
print(np.sqrt(cross_val_score(ls, X_train, y_train, scoring='r2', cv=5)))
[11.63326364]
    SalePrice
99  11.767188
0.9143217824770862
0.912713281276043
22220.831580976675
[0.04530086 0.04875491 0.07250084 ... 0.06706882 0.04069486 0.16519963]
12.84067153930664
[0.95639987 0.94282027 0.94192452 0.9501967  0.9602258 ]
# xgb = XGBRegressor()
xgb = XGBRegressor(learning_rate=0.02, max_depth=3, n_estimators=1000)
xgb.fit(X_train, y_train)
print(xgb.predict(X_test.iloc[[142], :]))
print(y_test.iloc[[142], :])
print(xgb.score(X_test, y_test))
print(xgb.score(X_train, y_train))
print(np.sqrt(mean_squared_error(np.expm1(y_test), np.expm1(xgb.predict(X_test)))))
print(np.sqrt(mean_squared_error(np.expm1(y_train), np.expm1(xgb.predict(X_train)))))
print(np.sqrt(-cross_val_score(xgb, X_train, y_train, scoring='neg_mean_squared_error', cv=5)))
print(np.sqrt(cross_val_score(xgb, X_train, y_train, scoring='r2', cv=5)))
[11.663737]
    SalePrice
99  11.767188
0.92229859972357
0.9710584050678127
23644.2496823488
11613.610958582125
[0.12072391 0.1213991  0.13657591 0.10816418 0.11720516]
[0.95034336 0.94644622 0.93971222 0.95695243 0.96343044]
train_concat = pd.concat([X_train, y_train], axis=1)
train_concat
MoSoldMSZoningBsmtFinSF1GarageCondMSSubClassLotAreaWoodDeckSFGarageFinishSaleConditionSaleTypeYearBuiltGarageAreaBedroomAbvGrFoundationCentralAirBsmtFinType1Exterior1stFullBathGarageTypeGarageQualExterQualFireplacesKitchenQual1stFlrSFBsmtCondGrLivAreaHeatingQCHouseStyleLotFrontageBsmtExposureOpenPorchSFBsmtQualLotShapeMasVnrTypeOverallQualMasVnrAreaTotalBsmtSFYrSoldBsmtUnfSFNeighborhood2ndFlrSFExterior2ndTotRmsAbvGrdScreenPorchGarageCarsFireplaceQuGarageYrBltOverallCondYearRemodAddSalePrice
014051410574160244200510433515112532121082319534685.025030170.0108221082198711290342005.07200612.437188
173026104100144191618032158222101808316564260.01010140.066026605704980111916.06198711.813037
282551596629147144192530031025223101697313692654.01020150.067221214672460111930.04195011.548302
3830514875002441996512251511253212909317164670.0112030160.08802880168071270231996.07199612.160034
461053101200144191030842055123011964318892360.01021160.092549255925490141960.02195011.711785
.........................................................................................................................................................
116353365111042010024420094603516112531021212312124583.022230150.0121211176401260212009.07200912.133507
116403698211917001441965461221441221011214312144570.01020140.01214442040560211965.06196511.849405
116503600511127352161431972576321241231018643864250.01021130.08640264100550211980.07197211.619544
11661033125117800011419642882211515310191239122566.01021140.091230100450111964.07196411.652696
1167234775111261501441950576421462531122158421582584.032920150.01202472540870241950.06200112.400821

1166 rows × 50 columns

corr_last_list = train_concat.corr().iloc[:, -1].sort_values(ascending=False)
corr_last_list
SalePrice        1.000000
OverallQual      0.818545
Neighborhood     0.743155
GrLivArea        0.724649
GarageCars       0.687718
ExterQual        0.679473
KitchenQual      0.664916
GarageArea       0.660270
BsmtQual         0.637945
TotalBsmtSF      0.633197
1stFlrSF         0.605800
FullBath         0.602931
GarageFinish     0.599941
YearBuilt        0.586915
GarageType       0.580333
YearRemodAdd     0.569444
MSSubClass       0.553671
TotRmsAbvGrd     0.545820
Foundation       0.542097
FireplaceQu      0.537695
Fireplaces       0.478249
HeatingQC        0.469827
MasVnrType       0.441539
MasVnrArea       0.414464
MSZoning         0.405073
Exterior1st      0.404112
GarageCond       0.391892
Exterior2nd      0.391754
OverallCond      0.389876
BsmtFinType1     0.381671
BsmtFinSF1       0.375696
GarageQual       0.372400
CentralAir       0.371996
GarageYrBlt      0.368714
WoodDeckSF       0.342925
BsmtExposure     0.334175
2ndFlrSF         0.332800
SaleType         0.324187
OpenPorchSF      0.308198
BsmtCond         0.292069
LotArea          0.291945
HouseStyle       0.291766
LotShape         0.287286
SaleCondition    0.264004
BedroomAbvGr     0.236127
BsmtUnfSF        0.230587
LotFrontage      0.177707
ScreenPorch      0.098918
MoSold           0.075277
YrSold           0.048146
Name: SalePrice, dtype: float64
pf = PolynomialFeatures()
pd.DataFrame(pf.fit_transform(X_train))
0123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249...1025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274
01.01.04.00.05.014.010574.0160.02.04.04.02005.01043.03.05.01.05.011.02.05.03.02.01.02.01082.03.01953.04.06.085.02.050.03.00.01.07.00.01082.02.01082.019.0871.012.09.00.03.04.02005.07.02006.01.04.00.05.014.010574.0160.02.04.04.02005.01043.03.05.01.05.011.02.05.03.02.01.02.01082.03.01953.04.06.085.02.050.03.00.01.07.00.01082.02.01082.019.0871.012.09.00.03.04.02005.07.02006.016.00.020.056.042296.0640.08.016.016.08020.04172.012.020.04.020.044.08.020.012.08.04.08.04328.012.07812.016.024.0340.08.0200.012.00.04.028.00.04328.08.04328.076.03484.048.036.00.012.016.08020.028.08024.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.025.070.052870.0800.010.020.020.010025.05215.015.025.05.025.055.010.025.015.010.05.010.05410.015.09765.020.030.0425.010.0250.015.00.05.035.00.05410.010.05410.095.04355.060.045.00.015.020.010025.035.010030.0196.0148036.02240.028.056.056.028070.014602.042.070.0...300.018.00.06.042.00.06492.012.06492.0114.05226.072.054.00.018.024.012030.042.012036.07225.0170.04250.0255.00.085.0595.00.091970.0170.091970.01615.074035.01020.0765.00.0255.0340.0170425.0595.0170510.04.0100.06.00.02.014.00.02164.04.02164.038.01742.024.018.00.06.08.04010.014.04012.02500.0150.00.050.0350.00.054100.0100.054100.0950.043550.0600.0450.00.0150.0200.0100250.0350.0100300.09.00.03.021.00.03246.06.03246.057.02613.036.027.00.09.012.06015.021.06018.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.07.00.01082.02.01082.019.0871.012.09.00.03.04.02005.07.02006.049.00.07574.014.07574.0133.06097.084.063.00.021.028.014035.049.014042.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01170724.02164.01170724.020558.0942422.012984.09738.00.03246.04328.02169410.07574.02170492.04.02164.038.01742.024.018.00.06.08.04010.014.04012.01170724.020558.0942422.012984.09738.00.03246.04328.02169410.07574.02170492.0361.016549.0228.0171.00.057.076.038095.0133.038114.0758641.010452.07839.00.02613.03484.01746355.06097.01747226.0144.0108.00.036.048.024060.084.024072.081.00.027.036.018045.063.018054.00.00.00.00.00.00.09.012.06015.021.06018.016.08020.028.08024.04020025.014035.04022030.049.014042.04024036.0
11.07.03.00.02.06.010410.00.01.04.04.01916.0180.03.02.01.05.08.02.02.02.01.00.01.0808.03.01656.04.02.060.01.00.01.00.01.04.00.0660.02.0660.05.0704.09.08.00.01.01.01916.06.01987.049.021.00.014.042.072870.00.07.028.028.013412.01260.021.014.07.035.056.014.014.014.07.00.07.05656.021.011592.028.014.0420.07.00.07.00.07.028.00.04620.014.04620.035.04928.063.056.00.07.07.013412.042.013909.09.00.06.018.031230.00.03.012.012.05748.0540.09.06.03.015.024.06.06.06.03.00.03.02424.09.04968.012.06.0180.03.00.03.00.03.012.00.01980.06.01980.015.02112.027.024.00.03.03.05748.018.05961.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.04.012.020820.00.02.08.08.03832.0360.06.04.02.010.016.04.04.04.02.00.02.01616.06.03312.08.04.0120.02.00.02.00.02.08.00.01320.04.01320.010.01408.018.016.00.02.02.03832.012.03974.036.062460.00.06.024.024.011496.01080.018.012.0...0.02.00.02.08.00.01320.04.01320.010.01408.018.016.00.02.02.03832.012.03974.03600.060.00.060.00.060.0240.00.039600.0120.039600.0300.042240.0540.0480.00.060.060.0114960.0360.0119220.01.00.01.00.01.04.00.0660.02.0660.05.0704.09.08.00.01.01.01916.06.01987.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.00.01.04.00.0660.02.0660.05.0704.09.08.00.01.01.01916.06.01987.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.04.00.0660.02.0660.05.0704.09.08.00.01.01.01916.06.01987.016.00.02640.08.02640.020.02816.036.032.00.04.04.07664.024.07948.00.00.00.00.00.00.00.00.00.00.00.00.00.00.0435600.01320.0435600.03300.0464640.05940.05280.00.0660.0660.01264560.03960.01311420.04.01320.010.01408.018.016.00.02.02.03832.012.03974.0435600.03300.0464640.05940.05280.00.0660.0660.01264560.03960.01311420.025.03520.045.040.00.05.05.09580.030.09935.0495616.06336.05632.00.0704.0704.01348864.04224.01398848.081.072.00.09.09.017244.054.017883.064.00.08.08.015328.048.015896.00.00.00.00.00.00.01.01.01916.06.01987.01.01916.06.01987.03671056.011496.03807092.036.011922.03948169.0
21.08.02.0551.05.09.06629.0147.01.04.04.01925.0300.03.01.00.02.05.02.02.03.01.00.01.0697.03.01369.02.06.054.01.00.02.00.01.05.00.0672.02.0121.04.0672.04.06.00.01.01.01930.04.01950.064.016.04408.040.072.053032.01176.08.032.032.015400.02400.024.08.00.016.040.016.016.024.08.00.08.05576.024.010952.016.048.0432.08.00.016.00.08.040.00.05376.016.0968.032.05376.032.048.00.08.08.015440.032.015600.04.01102.010.018.013258.0294.02.08.08.03850.0600.06.02.00.04.010.04.04.06.02.00.02.01394.06.02738.04.012.0108.02.00.04.00.02.010.00.01344.04.0242.08.01344.08.012.00.02.02.03860.08.03900.0303601.02755.04959.03652579.080997.0551.02204.02204.01060675.0165300.01653.0551.00.01102.02755.01102.01102.01653.0551.00.0551.0384047.01653.0754319.01102.03306.029754.0551.00.01102.00.0551.02755.00.0370272.01102.066671.02204.0370272.02204.03306.00.0551.0551.01063430.02204.01074450.025.045.033145.0735.05.020.020.09625.01500.015.05.00.010.025.010.010.015.05.00.05.03485.015.06845.010.030.0270.05.00.010.00.05.025.00.03360.010.0605.020.03360.020.030.00.05.05.09650.020.09750.081.059661.01323.09.036.036.017325.02700.027.09.0...0.012.00.06.030.00.04032.012.0726.024.04032.024.036.00.06.06.011580.024.011700.02916.054.00.0108.00.054.0270.00.036288.0108.06534.0216.036288.0216.0324.00.054.054.0104220.0216.0105300.01.00.02.00.01.05.00.0672.02.0121.04.0672.04.06.00.01.01.01930.04.01950.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.04.00.02.010.00.01344.04.0242.08.01344.08.012.00.02.02.03860.08.03900.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.05.00.0672.02.0121.04.0672.04.06.00.01.01.01930.04.01950.025.00.03360.010.0605.020.03360.020.030.00.05.05.09650.020.09750.00.00.00.00.00.00.00.00.00.00.00.00.00.00.0451584.01344.081312.02688.0451584.02688.04032.00.0672.0672.01296960.02688.01310400.04.0242.08.01344.08.012.00.02.02.03860.08.03900.014641.0484.081312.0484.0726.00.0121.0121.0233530.0484.0235950.016.02688.016.024.00.04.04.07720.016.07800.0451584.02688.04032.00.0672.0672.01296960.02688.01310400.016.024.00.04.04.07720.016.07800.036.00.06.06.011580.024.011700.00.00.00.00.00.00.01.01.01930.04.01950.01.01930.04.01950.03724900.07720.03763500.016.07800.03802500.0
31.08.03.00.05.014.08750.00.02.04.04.01996.0512.02.05.01.05.011.02.05.03.02.01.02.0909.03.01716.04.06.070.01.0120.03.00.01.06.00.0880.02.0880.016.0807.012.07.00.02.03.01996.07.01996.064.024.00.040.0112.070000.00.016.032.032.015968.04096.016.040.08.040.088.016.040.024.016.08.016.07272.024.013728.032.048.0560.08.0960.024.00.08.048.00.07040.016.07040.0128.06456.096.056.00.016.024.015968.056.015968.09.00.015.042.026250.00.06.012.012.05988.01536.06.015.03.015.033.06.015.09.06.03.06.02727.09.05148.012.018.0210.03.0360.09.00.03.018.00.02640.06.02640.048.02421.036.021.00.06.09.05988.021.05988.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.025.070.043750.00.010.020.020.09980.02560.010.025.05.025.055.010.025.015.010.05.010.04545.015.08580.020.030.0350.05.0600.015.00.05.030.00.04400.010.04400.080.04035.060.035.00.010.015.09980.035.09980.0196.0122500.00.028.056.056.027944.07168.028.070.0...720.018.00.06.036.00.05280.012.05280.096.04842.072.042.00.012.018.011976.042.011976.04900.070.08400.0210.00.070.0420.00.061600.0140.061600.01120.056490.0840.0490.00.0140.0210.0139720.0490.0139720.01.0120.03.00.01.06.00.0880.02.0880.016.0807.012.07.00.02.03.01996.07.01996.014400.0360.00.0120.0720.00.0105600.0240.0105600.01920.096840.01440.0840.00.0240.0360.0239520.0840.0239520.09.00.03.018.00.02640.06.02640.048.02421.036.021.00.06.09.05988.021.05988.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.06.00.0880.02.0880.016.0807.012.07.00.02.03.01996.07.01996.036.00.05280.012.05280.096.04842.072.042.00.012.018.011976.042.011976.00.00.00.00.00.00.00.00.00.00.00.00.00.00.0774400.01760.0774400.014080.0710160.010560.06160.00.01760.02640.01756480.06160.01756480.04.01760.032.01614.024.014.00.04.06.03992.014.03992.0774400.014080.0710160.010560.06160.00.01760.02640.01756480.06160.01756480.0256.012912.0192.0112.00.032.048.031936.0112.031936.0651249.09684.05649.00.01614.02421.01610772.05649.01610772.0144.084.00.024.036.023952.084.023952.049.00.014.021.013972.049.013972.00.00.00.00.00.00.04.06.03992.014.03992.09.05988.021.05988.03984016.013972.03984016.049.013972.03984016.0
41.06.01.00.05.03.010120.00.01.04.04.01910.0308.04.02.00.05.05.01.02.03.00.01.01.0964.03.01889.02.03.060.01.00.02.01.01.06.00.0925.04.0925.05.0925.04.09.00.01.04.01960.02.01950.036.06.00.030.018.060720.00.06.024.024.011460.01848.024.012.00.030.030.06.012.018.00.06.06.05784.018.011334.012.018.0360.06.00.012.06.06.036.00.05550.024.05550.030.05550.024.054.00.06.024.011760.012.011700.01.00.05.03.010120.00.01.04.04.01910.0308.04.02.00.05.05.01.02.03.00.01.01.0964.03.01889.02.03.060.01.00.02.01.01.06.00.0925.04.0925.05.0925.04.09.00.01.04.01960.02.01950.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.025.015.050600.00.05.020.020.09550.01540.020.010.00.025.025.05.010.015.00.05.05.04820.015.09445.010.015.0300.05.00.010.05.05.030.00.04625.020.04625.025.04625.020.045.00.05.020.09800.010.09750.09.030360.00.03.012.012.05730.0924.012.06.0...0.06.03.03.018.00.02775.012.02775.015.02775.012.027.00.03.012.05880.06.05850.03600.060.00.0120.060.060.0360.00.055500.0240.055500.0300.055500.0240.0540.00.060.0240.0117600.0120.0117000.01.00.02.01.01.06.00.0925.04.0925.05.0925.04.09.00.01.04.01960.02.01950.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.04.02.02.012.00.01850.08.01850.010.01850.08.018.00.02.08.03920.04.03900.01.01.06.00.0925.04.0925.05.0925.04.09.00.01.04.01960.02.01950.01.06.00.0925.04.0925.05.0925.04.09.00.01.04.01960.02.01950.036.00.05550.024.05550.030.05550.024.054.00.06.024.011760.012.011700.00.00.00.00.00.00.00.00.00.00.00.00.00.00.0855625.03700.0855625.04625.0855625.03700.08325.00.0925.03700.01813000.01850.01803750.016.03700.020.03700.016.036.00.04.016.07840.08.07800.0855625.04625.0855625.03700.08325.00.0925.03700.01813000.01850.01803750.025.04625.020.045.00.05.020.09800.010.09750.0855625.03700.08325.00.0925.03700.01813000.01850.01803750.016.036.00.04.016.07840.08.07800.081.00.09.036.017640.018.017550.00.00.00.00.00.00.01.04.01960.02.01950.016.07840.08.07800.03841600.03920.03822000.04.03900.03802500.0
..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
11611.05.03.036.05.011.010420.0100.02.04.04.02009.0460.03.05.01.06.011.02.05.03.01.00.02.01212.03.01212.04.05.083.02.022.03.00.01.05.00.01212.01.01176.04.00.012.06.00.02.01.02009.07.02009.025.015.0180.025.055.052100.0500.010.020.020.010045.02300.015.025.05.030.055.010.025.015.05.00.010.06060.015.06060.020.025.0415.010.0110.015.00.05.025.00.06060.05.05880.020.00.060.030.00.010.05.010045.035.010045.09.0108.015.033.031260.0300.06.012.012.06027.01380.09.015.03.018.033.06.015.09.03.00.06.03636.09.03636.012.015.0249.06.066.09.00.03.015.00.03636.03.03528.012.00.036.018.00.06.03.06027.021.06027.01296.0180.0396.0375120.03600.072.0144.0144.072324.016560.0108.0180.036.0216.0396.072.0180.0108.036.00.072.043632.0108.043632.0144.0180.02988.072.0792.0108.00.036.0180.00.043632.036.042336.0144.00.0432.0216.00.072.036.072324.0252.072324.025.055.052100.0500.010.020.020.010045.02300.015.025.05.030.055.010.025.015.05.00.010.06060.015.06060.020.025.0415.010.0110.015.00.05.025.00.06060.05.05880.020.00.060.030.00.010.05.010045.035.010045.0121.0114620.01100.022.044.044.022099.05060.033.055.0...110.015.00.05.025.00.06060.05.05880.020.00.060.030.00.010.05.010045.035.010045.06889.0166.01826.0249.00.083.0415.00.0100596.083.097608.0332.00.0996.0498.00.0166.083.0166747.0581.0166747.04.044.06.00.02.010.00.02424.02.02352.08.00.024.012.00.04.02.04018.014.04018.0484.066.00.022.0110.00.026664.022.025872.088.00.0264.0132.00.044.022.044198.0154.044198.09.00.03.015.00.03636.03.03528.012.00.036.018.00.06.03.06027.021.06027.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.05.00.01212.01.01176.04.00.012.06.00.02.01.02009.07.02009.025.00.06060.05.05880.020.00.060.030.00.010.05.010045.035.010045.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01468944.01212.01425312.04848.00.014544.07272.00.02424.01212.02434908.08484.02434908.01.01176.04.00.012.06.00.02.01.02009.07.02009.01382976.04704.00.014112.07056.00.02352.01176.02362584.08232.02362584.016.00.048.024.00.08.04.08036.028.08036.00.00.00.00.00.00.00.00.00.0144.072.00.024.012.024108.084.024108.036.00.012.06.012054.042.012054.00.00.00.00.00.00.04.02.04018.014.04018.01.02009.07.02009.04036081.014063.04036081.049.014063.04036081.0
11621.00.03.0698.02.011.09170.00.01.04.04.01965.0461.02.02.01.04.04.01.02.02.01.00.01.01214.03.01214.04.05.070.01.00.02.00.01.04.00.01214.04.0420.04.00.05.06.00.02.01.01965.06.01965.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.09.02094.06.033.027510.00.03.012.012.05895.01383.06.06.03.012.012.03.06.06.03.00.03.03642.09.03642.012.015.0210.03.00.06.00.03.012.00.03642.012.01260.012.00.015.018.00.06.03.05895.018.05895.0487204.01396.07678.06400660.00.0698.02792.02792.01371570.0321778.01396.01396.0698.02792.02792.0698.01396.01396.0698.00.0698.0847372.02094.0847372.02792.03490.048860.0698.00.01396.00.0698.02792.00.0847372.02792.0293160.02792.00.03490.04188.00.01396.0698.01371570.04188.01371570.04.022.018340.00.02.08.08.03930.0922.04.04.02.08.08.02.04.04.02.00.02.02428.06.02428.08.010.0140.02.00.04.00.02.08.00.02428.08.0840.08.00.010.012.00.04.02.03930.012.03930.0121.0100870.00.011.044.044.021615.05071.022.022.0...0.010.00.05.020.00.06070.020.02100.020.00.025.030.00.010.05.09825.030.09825.04900.070.00.0140.00.070.0280.00.084980.0280.029400.0280.00.0350.0420.00.0140.070.0137550.0420.0137550.01.00.02.00.01.04.00.01214.04.0420.04.00.05.06.00.02.01.01965.06.01965.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.04.00.02.08.00.02428.08.0840.08.00.010.012.00.04.02.03930.012.03930.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.04.00.01214.04.0420.04.00.05.06.00.02.01.01965.06.01965.016.00.04856.016.01680.016.00.020.024.00.08.04.07860.024.07860.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01473796.04856.0509880.04856.00.06070.07284.00.02428.01214.02385510.07284.02385510.016.01680.016.00.020.024.00.08.04.07860.024.07860.0176400.01680.00.02100.02520.00.0840.0420.0825300.02520.0825300.016.00.020.024.00.08.04.07860.024.07860.00.00.00.00.00.00.00.00.00.025.030.00.010.05.09825.030.09825.036.00.012.06.011790.036.011790.00.00.00.00.00.00.04.02.03930.012.03930.01.01965.06.01965.03861225.011790.03861225.036.011790.03861225.0
11631.00.03.0600.05.011.012735.0216.01.04.03.01972.0576.03.02.01.02.04.01.02.03.01.00.01.0864.03.0864.02.05.00.01.00.02.01.01.03.00.0864.00.0264.010.00.05.05.00.02.01.01980.07.01972.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.09.01800.015.033.038205.0648.03.012.09.05916.01728.09.06.03.06.012.03.06.09.03.00.03.02592.09.02592.06.015.00.03.00.06.03.03.09.00.02592.00.0792.030.00.015.015.00.06.03.05940.021.05916.0360000.03000.06600.07641000.0129600.0600.02400.01800.01183200.0345600.01800.01200.0600.01200.02400.0600.01200.01800.0600.00.0600.0518400.01800.0518400.01200.03000.00.0600.00.01200.0600.0600.01800.00.0518400.00.0158400.06000.00.03000.03000.00.01200.0600.01188000.04200.01183200.025.055.063675.01080.05.020.015.09860.02880.015.010.05.010.020.05.010.015.05.00.05.04320.015.04320.010.025.00.05.00.010.05.05.015.00.04320.00.01320.050.00.025.025.00.010.05.09900.035.09860.0121.0140085.02376.011.044.033.021692.06336.033.022.0...0.010.05.05.015.00.04320.00.01320.050.00.025.025.00.010.05.09900.035.09860.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.00.02.01.01.03.00.0864.00.0264.010.00.05.05.00.02.01.01980.07.01972.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.04.02.02.06.00.01728.00.0528.020.00.010.010.00.04.02.03960.014.03944.01.01.03.00.0864.00.0264.010.00.05.05.00.02.01.01980.07.01972.01.03.00.0864.00.0264.010.00.05.05.00.02.01.01980.07.01972.09.00.02592.00.0792.030.00.015.015.00.06.03.05940.021.05916.00.00.00.00.00.00.00.00.00.00.00.00.00.00.0746496.00.0228096.08640.00.04320.04320.00.01728.0864.01710720.06048.01703808.00.00.00.00.00.00.00.00.00.00.00.00.069696.02640.00.01320.01320.00.0528.0264.0522720.01848.0520608.0100.00.050.050.00.020.010.019800.070.019720.00.00.00.00.00.00.00.00.00.025.025.00.010.05.09900.035.09860.025.00.010.05.09900.035.09860.00.00.00.00.00.00.04.02.03960.014.03944.01.01980.07.01972.03920400.013860.03904560.049.013804.03888784.0
11641.010.03.0312.05.011.07800.00.01.01.04.01964.0288.02.02.01.01.05.01.05.03.01.00.01.0912.03.0912.02.05.066.01.00.02.01.01.04.00.0912.03.00.010.00.04.05.00.01.01.01964.07.01964.0100.030.03120.050.0110.078000.00.010.010.040.019640.02880.020.020.010.010.050.010.050.030.010.00.010.09120.030.09120.020.050.0660.010.00.020.010.010.040.00.09120.030.00.0100.00.040.050.00.010.010.019640.070.019640.09.0936.015.033.023400.00.03.03.012.05892.0864.06.06.03.03.015.03.015.09.03.00.03.02736.09.02736.06.015.0198.03.00.06.03.03.012.00.02736.09.00.030.00.012.015.00.03.03.05892.021.05892.097344.01560.03432.02433600.00.0312.0312.01248.0612768.089856.0624.0624.0312.0312.01560.0312.01560.0936.0312.00.0312.0284544.0936.0284544.0624.01560.020592.0312.00.0624.0312.0312.01248.00.0284544.0936.00.03120.00.01248.01560.00.0312.0312.0612768.02184.0612768.025.055.039000.00.05.05.020.09820.01440.010.010.05.05.025.05.025.015.05.00.05.04560.015.04560.010.025.0330.05.00.010.05.05.020.00.04560.015.00.050.00.020.025.00.05.05.09820.035.09820.0121.085800.00.011.011.044.021604.03168.022.022.0...0.010.05.05.020.00.04560.015.00.050.00.020.025.00.05.05.09820.035.09820.04356.066.00.0132.066.066.0264.00.060192.0198.00.0660.00.0264.0330.00.066.066.0129624.0462.0129624.01.00.02.01.01.04.00.0912.03.00.010.00.04.05.00.01.01.01964.07.01964.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.04.02.02.08.00.01824.06.00.020.00.08.010.00.02.02.03928.014.03928.01.01.04.00.0912.03.00.010.00.04.05.00.01.01.01964.07.01964.01.04.00.0912.03.00.010.00.04.05.00.01.01.01964.07.01964.016.00.03648.012.00.040.00.016.020.00.04.04.07856.028.07856.00.00.00.00.00.00.00.00.00.00.00.00.00.00.0831744.02736.00.09120.00.03648.04560.00.0912.0912.01791168.06384.01791168.09.00.030.00.012.015.00.03.03.05892.021.05892.00.00.00.00.00.00.00.00.00.00.00.0100.00.040.050.00.010.010.019640.070.019640.00.00.00.00.00.00.00.00.00.016.020.00.04.04.07856.028.07856.025.00.05.05.09820.035.09820.00.00.00.00.00.00.01.01.01964.07.01964.01.01964.07.01964.03857296.013748.03857296.049.013748.03857296.0
11651.02.03.0477.05.011.012615.00.01.04.04.01950.0576.04.02.01.04.06.02.05.03.01.01.02.02158.04.02158.02.05.084.03.029.02.00.01.05.00.01202.04.0725.04.00.08.07.00.02.04.01950.06.02001.04.06.0954.010.022.025230.00.02.08.08.03900.01152.08.04.02.08.012.04.010.06.02.02.04.04316.08.04316.04.010.0168.06.058.04.00.02.010.00.02404.08.01450.08.00.016.014.00.04.08.03900.012.04002.09.01431.015.033.037845.00.03.012.012.05850.01728.012.06.03.012.018.06.015.09.03.03.06.06474.012.06474.06.015.0252.09.087.06.00.03.015.00.03606.012.02175.012.00.024.021.00.06.012.05850.018.06003.0227529.02385.05247.06017355.00.0477.01908.01908.0930150.0274752.01908.0954.0477.01908.02862.0954.02385.01431.0477.0477.0954.01029366.01908.01029366.0954.02385.040068.01431.013833.0954.00.0477.02385.00.0573354.01908.0345825.01908.00.03816.03339.00.0954.01908.0930150.02862.0954477.025.055.063075.00.05.020.020.09750.02880.020.010.05.020.030.010.025.015.05.05.010.010790.020.010790.010.025.0420.015.0145.010.00.05.025.00.06010.020.03625.020.00.040.035.00.010.020.09750.030.010005.0121.0138765.00.011.044.044.021450.06336.044.022.0...145.010.00.05.025.00.06010.020.03625.020.00.040.035.00.010.020.09750.030.010005.07056.0252.02436.0168.00.084.0420.00.0100968.0336.060900.0336.00.0672.0588.00.0168.0336.0163800.0504.0168084.09.087.06.00.03.015.00.03606.012.02175.012.00.024.021.00.06.012.05850.018.06003.0841.058.00.029.0145.00.034858.0116.021025.0116.00.0232.0203.00.058.0116.056550.0174.058029.04.00.02.010.00.02404.08.01450.08.00.016.014.00.04.08.03900.012.04002.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.05.00.01202.04.0725.04.00.08.07.00.02.04.01950.06.02001.025.00.06010.020.03625.020.00.040.035.00.010.020.09750.030.010005.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01444804.04808.0871450.04808.00.09616.08414.00.02404.04808.02343900.07212.02405202.016.02900.016.00.032.028.00.08.016.07800.024.08004.0525625.02900.00.05800.05075.00.01450.02900.01413750.04350.01450725.016.00.032.028.00.08.016.07800.024.08004.00.00.00.00.00.00.00.00.00.064.056.00.016.032.015600.048.016008.049.00.014.028.013650.042.014007.00.00.00.00.00.00.04.08.03900.012.04002.016.07800.024.08004.03802500.011700.03901950.036.012006.04004001.0

1166 rows × 1275 columns

# std = StandardScaler()
X_train_new = X_train.copy()
# X_train_new = pd.DataFrame(std.fit_transform(X_train_new), columns=X_train_new.columns)
pf = PolynomialFeatures(include_bias=False)
for index_1, col_1 in enumerate(X_train.columns):
    for index_2, col_2 in enumerate(X_train.columns):
        if index_1 >= index_2:
            continue
#         print(col_1, col_2)
        col_corr = max(corr_last_list[col_1], corr_last_list[col_2])
#         print(col_corr)
        pd_df = pd.concat([pd.DataFrame(pf.fit_transform(X_train.loc[:, [col_1, col_2]])), y_train], axis=1)
#         print(pd_df)
#         print(pd_df.corr())
#         print(1)
        a = ((pd_df.corr().iloc[:, -1] - col_corr)>0).value_counts()
#         print(a)
#         print(a[True])
        if a[True] > 1 :
#             print(max((pd_df.corr().iloc[0: -1, -1])))
            index_3 = (pd_df.corr().iloc[0: -1, -1])[(pd_df.corr().iloc[0: -1, -1]).values == max((pd_df.corr().iloc[0: -1, -1]))].index
#             print(index_3)
            X_train_new[col_1 + 'PnFt' + col_2] = pd_df.iloc[:, index_3[0]]

X_train_new
MoSoldMSZoningBsmtFinSF1GarageCondMSSubClassLotAreaWoodDeckSFGarageFinishSaleConditionSaleTypeYearBuiltGarageAreaBedroomAbvGrFoundationCentralAirBsmtFinType1Exterior1stFullBathGarageTypeGarageQualExterQualFireplacesKitchenQual1stFlrSFBsmtCondGrLivAreaHeatingQCHouseStyleLotFrontageBsmtExposureOpenPorchSFBsmtQualLotShapeMasVnrTypeOverallQualMasVnrAreaTotalBsmtSFYrSoldBsmtUnfSFNeighborhood2ndFlrSFExterior2ndTotRmsAbvGrdScreenPorchGarageCarsFireplaceQuGarageYrBltOverallCondYearRemodAdd
014051410574160244200510433515112532121082319534685.025030170.0108221082198711290342005.072006
173026104100144191618032158222101808316564260.01010140.06602660570
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值