1. 函数库导入
import pandas as pd
import numpy as np
from sklearn. metrics import mean_squared_error
import lightgbm as lgb
import xgboost as xgb
from sklearn. model_selection import train_test_split
from sklearn. preprocessing import OneHotEncoder
from sklearn. model_selection import KFold, RepeatedKFold
from scipy import sparse
pd. set_option( 'display.max_columns' , None )
pd. set_option( 'display.max_rows' , None )
from datetime import datetime
2. 导入数据
train_abbr= pd. read_csv( "./data/happiness_train_abbr.csv" , encoding= 'ISO-8859-1' )
train= pd. read_csv( "./data/happiness_train_complete.csv" , encoding= 'ISO-8859-1' )
test_abbr= pd. read_csv( "./data/happiness_test_abbr.csv" , encoding= 'ISO-8859-1' )
test= pd. read_csv( "./data/happiness_test_complete.csv" , encoding= 'ISO-8859-1' )
test_sub= pd. read_csv( "./data/happiness_submit.csv" , encoding= 'ISO-8859-1' )
3. 查看数据
test. shape
(2968, 139)
test_sub. shape
(2968, 2)
train. shape
(8000, 140)
train. head( )
id happiness survey_type province city county survey_time gender birth nationality religion religion_freq edu edu_other edu_status edu_yr income political join_party floor_area property_0 property_1 property_2 property_3 property_4 property_5 property_6 property_7 property_8 property_other height_cm weight_jin health health_problem depression hukou hukou_loc media_1 media_2 media_3 media_4 media_5 media_6 leisure_1 leisure_2 leisure_3 leisure_4 leisure_5 leisure_6 leisure_7 leisure_8 leisure_9 leisure_10 leisure_11 leisure_12 socialize relax learn social_neighbor social_friend socia_outing equity class class_10_before class_10_after class_14 work_exper work_status work_yr work_type work_manage insur_1 insur_2 insur_3 insur_4 family_income family_m family_status house car invest_0 invest_1 invest_2 invest_3 invest_4 invest_5 invest_6 invest_7 invest_8 invest_other son daughter minor_child marital marital_1st s_birth marital_now s_edu s_political s_hukou s_income s_work_exper s_work_status s_work_type f_birth f_edu f_political f_work_14 m_birth m_edu m_political m_work_14 status_peer status_3_before view inc_ability inc_exp trust_1 trust_2 trust_3 trust_4 trust_5 trust_6 trust_7 trust_8 trust_9 trust_10 trust_11 trust_12 trust_13 neighbor_familiarity public_service_1 public_service_2 public_service_3 public_service_4 public_service_5 public_service_6 public_service_7 public_service_8 public_service_9 0 1 4 1 12 32 59 2015/8/4 14:18 1 1959 1 1 1 11 NaN 4.0 -2.0 20000 1 NaN 45.0 0 1 0 0 0 0 0 0 0 NaN 176 155 3 2 5 5 2.0 4 2 5 5 4 3 1 4 3 1 2 3 4 1 4 5 4 1 2 4 3 3.0 3.0 2 3 3 3 3 1 1 3.0 30.0 1.0 2.0 1 1 1 2 60000.0 2 2 1 2 0 1 0 0 0 0 0 0 0 NaN 1 0 0.0 3 1984.0 1958.0 1984.0 6.0 1.0 5.0 40000.0 5.0 NaN NaN -2 4 4 1 -2 4 1 1 3 2 4 3 50000.0 4 2 -8 -8 5 3 2 3 4 3 -8 4 1 4 50 60 50 50 30.0 30 50 50 50 1 2 4 2 18 52 85 2015/7/21 15:04 1 1992 1 1 1 12 NaN 4.0 2013.0 20000 1 NaN 110.0 0 0 0 0 1 0 0 0 0 NaN 170 110 5 4 3 1 1.0 2 2 1 3 5 1 2 3 4 3 5 4 3 2 3 4 5 1 2 4 3 6.0 2.0 1 3 6 4 8 5 1 3.0 2.0 1.0 3.0 1 1 1 1 40000.0 3 4 1 2 0 1 0 0 0 0 0 0 0 NaN 0 0 NaN 1 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 1972 3 1 2 1973 3 1 2 1 1 4 2 50000.0 5 4 4 3 5 3 3 3 2 3 3 3 2 3 90 70 70 80 85.0 70 90 60 60 2 3 4 2 29 83 126 2015/7/21 13:24 2 1967 1 0 3 4 NaN 4.0 -2.0 2000 1 NaN 120.0 0 1 1 0 0 0 0 0 0 NaN 160 122 4 4 5 1 1.0 2 2 2 5 1 3 1 4 4 3 5 4 4 2 3 5 5 5 3 4 2 2.0 5.0 2 4 5 4 6 3 2 NaN NaN NaN NaN 1 1 2 2 8000.0 3 3 1 2 0 1 0 0 0 0 0 0 0 NaN 0 2 1.0 3 1990.0 1968.0 1990.0 3.0 1.0 1.0 6000.0 3.0 NaN NaN -2 1 1 2 -2 1 1 2 2 1 4 2 80000.0 3 3 3 3 4 3 3 3 3 3 -8 3 1 4 90 80 75 79 80.0 90 90 90 75 3 4 5 2 10 28 51 2015/7/25 17:33 2 1943 1 1 1 3 NaN 4.0 1959.0 6420 1 NaN 78.0 0 0 0 1 0 0 0 0 0 NaN 163 170 4 4 4 1 2.0 2 1 1 5 1 1 1 5 2 4 5 4 5 1 1 5 5 5 2 4 4 1.0 6.0 1 4 5 5 7 2 4 NaN NaN NaN NaN 2 2 2 2 12000.0 3 3 1 1 0 1 0 0 0 0 0 0 0 NaN 1 4 0.0 7 1960.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN -2 14 1 2 -2 1 1 2 2 1 3 2 10000.0 3 3 4 3 5 3 3 5 4 3 3 3 2 3 100 90 70 80 80.0 90 90 80 80 4 5 4 1 7 18 36 2015/8/10 9:50 2 1994 1 1 1 12 NaN 1.0 2014.0 -1 2 NaN 70.0 0 0 0 0 1 0 0 0 0 NaN 165 110 5 5 3 2 3.0 1 3 4 2 5 5 3 3 3 2 4 4 3 5 2 5 5 1 4 3 4 7.0 5.0 3 2 1 1 1 4 6 NaN NaN NaN NaN 1 2 2 2 -2.0 4 3 1 1 0 1 0 0 0 0 0 0 0 NaN 0 0 NaN 1 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 1970 6 1 10 1972 4 1 15 3 2 3 -8 200000.0 4 3 3 3 5 5 3 4 3 3 3 3 2 2 50 50 50 50 50.0 50 50 50 50
train. info( verbose= True , show_counts= True )
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8000 entries, 0 to 7999
Data columns (total 140 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 id 8000 non-null int64
1 happiness 8000 non-null int64
2 survey_type 8000 non-null int64
3 province 8000 non-null int64
4 city 8000 non-null int64
5 county 8000 non-null int64
6 survey_time 8000 non-null object
7 gender 8000 non-null int64
8 birth 8000 non-null int64
9 nationality 8000 non-null int64
10 religion 8000 non-null int64
11 religion_freq 8000 non-null int64
12 edu 8000 non-null int64
13 edu_other 3 non-null object
14 edu_status 6880 non-null float64
15 edu_yr 6028 non-null float64
16 income 8000 non-null int64
17 political 8000 non-null int64
18 join_party 824 non-null float64
19 floor_area 8000 non-null float64
20 property_0 8000 non-null int64
21 property_1 8000 non-null int64
22 property_2 8000 non-null int64
23 property_3 8000 non-null int64
24 property_4 8000 non-null int64
25 property_5 8000 non-null int64
26 property_6 8000 non-null int64
27 property_7 8000 non-null int64
28 property_8 8000 non-null int64
29 property_other 66 non-null object
30 height_cm 8000 non-null int64
31 weight_jin 8000 non-null int64
32 health 8000 non-null int64
33 health_problem 8000 non-null int64
34 depression 8000 non-null int64
35 hukou 8000 non-null int64
36 hukou_loc 7996 non-null float64
37 media_1 8000 non-null int64
38 media_2 8000 non-null int64
39 media_3 8000 non-null int64
40 media_4 8000 non-null int64
41 media_5 8000 non-null int64
42 media_6 8000 non-null int64
43 leisure_1 8000 non-null int64
44 leisure_2 8000 non-null int64
45 leisure_3 8000 non-null int64
46 leisure_4 8000 non-null int64
47 leisure_5 8000 non-null int64
48 leisure_6 8000 non-null int64
49 leisure_7 8000 non-null int64
50 leisure_8 8000 non-null int64
51 leisure_9 8000 non-null int64
52 leisure_10 8000 non-null int64
53 leisure_11 8000 non-null int64
54 leisure_12 8000 non-null int64
55 socialize 8000 non-null int64
56 relax 8000 non-null int64
57 learn 8000 non-null int64
58 social_neighbor 7204 non-null float64
59 social_friend 7204 non-null float64
60 socia_outing 8000 non-null int64
61 equity 8000 non-null int64
62 class 8000 non-null int64
63 class_10_before 8000 non-null int64
64 class_10_after 8000 non-null int64
65 class_14 8000 non-null int64
66 work_exper 8000 non-null int64
67 work_status 2951 non-null float64
68 work_yr 2951 non-null float64
69 work_type 2951 non-null float64
70 work_manage 2951 non-null float64
71 insur_1 8000 non-null int64
72 insur_2 8000 non-null int64
73 insur_3 8000 non-null int64
74 insur_4 8000 non-null int64
75 family_income 7999 non-null float64
76 family_m 8000 non-null int64
77 family_status 8000 non-null int64
78 house 8000 non-null int64
79 car 8000 non-null int64
80 invest_0 8000 non-null int64
81 invest_1 8000 non-null int64
82 invest_2 8000 non-null int64
83 invest_3 8000 non-null int64
84 invest_4 8000 non-null int64
85 invest_5 8000 non-null int64
86 invest_6 8000 non-null int64
87 invest_7 8000 non-null int64
88 invest_8 8000 non-null int64
89 invest_other 29 non-null object
90 son 8000 non-null int64
91 daughter 8000 non-null int64
92 minor_child 6934 non-null float64
93 marital 8000 non-null int64
94 marital_1st 7172 non-null float64
95 s_birth 6282 non-null float64
96 marital_now 6230 non-null float64
97 s_edu 6282 non-null float64
98 s_political 6282 non-null float64
99 s_hukou 6282 non-null float64
100 s_income 6282 non-null float64
101 s_work_exper 6282 non-null float64
102 s_work_status 2565 non-null float64
103 s_work_type 2565 non-null float64
104 f_birth 8000 non-null int64
105 f_edu 8000 non-null int64
106 f_political 8000 non-null int64
107 f_work_14 8000 non-null int64
108 m_birth 8000 non-null int64
109 m_edu 8000 non-null int64
110 m_political 8000 non-null int64
111 m_work_14 8000 non-null int64
112 status_peer 8000 non-null int64
113 status_3_before 8000 non-null int64
114 view 8000 non-null int64
115 inc_ability 8000 non-null int64
116 inc_exp 8000 non-null float64
117 trust_1 8000 non-null int64
118 trust_2 8000 non-null int64
119 trust_3 8000 non-null int64
120 trust_4 8000 non-null int64
121 trust_5 8000 non-null int64
122 trust_6 8000 non-null int64
123 trust_7 8000 non-null int64
124 trust_8 8000 non-null int64
125 trust_9 8000 non-null int64
126 trust_10 8000 non-null int64
127 trust_11 8000 non-null int64
128 trust_12 8000 non-null int64
129 trust_13 8000 non-null int64
130 neighbor_familiarity 8000 non-null int64
131 public_service_1 8000 non-null int64
132 public_service_2 8000 non-null int64
133 public_service_3 8000 non-null int64
134 public_service_4 8000 non-null int64
135 public_service_5 8000 non-null float64
136 public_service_6 8000 non-null int64
137 public_service_7 8000 non-null int64
138 public_service_8 8000 non-null int64
139 public_service_9 8000 non-null int64
dtypes: float64(25), int64(111), object(4)
memory usage: 8.5+ MB
y_train_= train[ "happiness" ]
y_train_. value_counts( )
happiness
4 4818
5 1410
3 1159
2 497
1 104
-8 12
Name: count, dtype: int64
y_train_= y_train_. map ( lambda x: 3 if x== - 8 else x)
y_train_= y_train_. map ( lambda x: x- 1 )
data = pd. concat( [ train, test] , axis= 0 , ignore_index= True )
data. shape
(10968, 140)
4.数据处理
data[ 'survey_time' ] = pd. to_datetime( data[ 'survey_time' ] , format = '%Y/%m/%d %H:%M' )
data[ "weekday" ] = data[ "survey_time" ] . dt. weekday
data[ "year" ] = data[ "survey_time" ] . dt. year
data[ "quarter" ] = data[ "survey_time" ] . dt. quarter
data[ "hour" ] = data[ "survey_time" ] . dt. hour
data[ "month" ] = data[ "survey_time" ] . dt. month
def hour_cut ( x) :
if 0 <= x< 6 :
return 0
elif 6 <= x< 8 :
return 1
elif 8 <= x< 12 :
return 2
elif 12 <= x< 14 :
return 3
elif 14 <= x< 18 :
return 4
elif 18 <= x< 21 :
return 5
elif 21 <= x< 24 :
return 6
data[ "hour_cut" ] = data[ "hour" ] . map ( hour_cut)
data[ "survey_age" ] = data[ "year" ] - data[ "birth" ]
data[ "happiness" ] = data[ "happiness" ] . map ( lambda x: x- 1 )
data= data. drop( [ "edu_other" ] , axis= 1 )
data= data. drop( [ "happiness" ] , axis= 1 )
data= data. drop( [ "survey_time" ] , axis= 1 )
data[ "join_party" ] = data[ "join_party" ] . map ( lambda x: 0 if pd. isnull( x) else 1 )
def birth_split ( x) :
if 1920 <= x<= 1930 :
return 0
elif 1930 < x<= 1940 :
return 1
elif 1940 < x<= 1950 :
return 2
elif 1950 < x<= 1960 :
return 3
elif 1960 < x<= 1970 :
return 4
elif 1970 < x<= 1980 :
return 5
elif 1980 < x<= 1990 :
return 6
elif 1990 < x<= 2000 :
return 7
data[ "birth_s" ] = data[ "birth" ] . map ( birth_split)
def income_cut ( x) :
if x< 0 :
return 0
elif 0 <= x< 1200 :
return 1
elif 1200 < x<= 10000 :
return 2
elif 10000 < x< 24000 :
return 3
elif 24000 < x< 40000 :
return 4
elif 40000 <= x:
return 5
data[ "income_cut" ] = data[ "income" ] . map ( income_cut)
data[ "edu_status" ] = data[ "edu_status" ] . fillna( 5 )
data[ "edu_yr" ] = data[ "edu_yr" ] . fillna( - 2 )
data[ "property_other" ] = data[ "property_other" ] . map ( lambda x: 0 if pd. isnull( x) else 1 )
data[ "hukou_loc" ] = data[ "hukou_loc" ] . fillna( 1 )
data[ "social_neighbor" ] = data[ "social_neighbor" ] . fillna( 8 )
data[ "social_friend" ] = data[ "social_friend" ] . fillna( 8 )
data[ "work_status" ] = data[ "work_status" ] . fillna( 0 )
data[ "work_yr" ] = data[ "work_yr" ] . fillna( 0 )
data[ "work_type" ] = data[ "work_type" ] . fillna( 0 )
data[ "work_manage" ] = data[ "work_manage" ] . fillna( 0 )
data[ "family_income" ] = data[ "family_income" ] . fillna( - 2 )
data[ "invest_other" ] = data[ "invest_other" ] . map ( lambda x: 0 if pd. isnull( x) else 1 )
data[ "minor_child" ] = data[ "minor_child" ] . fillna( 0 )
data[ "marital_1st" ] = data[ "marital_1st" ] . fillna( 0 )
data[ "s_birth" ] = data[ "s_birth" ] . fillna( 0 )
data[ "marital_now" ] = data[ "marital_now" ] . fillna( 0 )
data[ "s_edu" ] = data[ "s_edu" ] . fillna( 0 )
data[ "s_political" ] = data[ "s_political" ] . fillna( 0 )
data[ "s_hukou" ] = data[ "s_hukou" ] . fillna( 0 )
data[ "s_income" ] = data[ "s_income" ] . fillna( 0 )
data[ "s_work_exper" ] = data[ "s_work_exper" ] . fillna( 0 )
data[ "s_work_status" ] = data[ "s_work_status" ] . fillna( 0 )
data[ "s_work_type" ] = data[ "s_work_type" ] . fillna( 0 )
data= data. drop( [ "id" ] , axis= 1 )
X_train_ = data[ : train. shape[ 0 ] ]
X_test_ = data[ train. shape[ 0 ] : ]
target_column = 'happiness'
feature_columns= list ( X_test_. columns)
feature_columns
['survey_type',
'province',
'city',
'county',
'gender',
'birth',
'nationality',
'religion',
'religion_freq',
'edu',
'edu_status',
'edu_yr',
'income',
'political',
'join_party',
'floor_area',
'property_0',
'property_1',
'property_2',
'property_3',
'property_4',
'property_5',
'property_6',
'property_7',
'property_8',
'property_other',
'height_cm',
'weight_jin',
'health',
'health_problem',
'depression',
'hukou',
'hukou_loc',
'media_1',
'media_2',
'media_3',
'media_4',
'media_5',
'media_6',
'leisure_1',
'leisure_2',
'leisure_3',
'leisure_4',
'leisure_5',
'leisure_6',
'leisure_7',
'leisure_8',
'leisure_9',
'leisure_10',
'leisure_11',
'leisure_12',
'socialize',
'relax',
'learn',
'social_neighbor',
'social_friend',
'socia_outing',
'equity',
'class',
'class_10_before',
'class_10_after',
'class_14',
'work_exper',
'work_status',
'work_yr',
'work_type',
'work_manage',
'insur_1',
'insur_2',
'insur_3',
'insur_4',
'family_income',
'family_m',
'family_status',
'house',
'car',
'invest_0',
'invest_1',
'invest_2',
'invest_3',
'invest_4',
'invest_5',
'invest_6',
'invest_7',
'invest_8',
'invest_other',
'son',
'daughter',
'minor_child',
'marital',
'marital_1st',
's_birth',
'marital_now',
's_edu',
's_political',
's_hukou',
's_income',
's_work_exper',
's_work_status',
's_work_type',
'f_birth',
'f_edu',
'f_political',
'f_work_14',
'm_birth',
'm_edu',
'm_political',
'm_work_14',
'status_peer',
'status_3_before',
'view',
'inc_ability',
'inc_exp',
'trust_1',
'trust_2',
'trust_3',
'trust_4',
'trust_5',
'trust_6',
'trust_7',
'trust_8',
'trust_9',
'trust_10',
'trust_11',
'trust_12',
'trust_13',
'neighbor_familiarity',
'public_service_1',
'public_service_2',
'public_service_3',
'public_service_4',
'public_service_5',
'public_service_6',
'public_service_7',
'public_service_8',
'public_service_9',
'weekday',
'year',
'quarter',
'hour',
'month',
'hour_cut',
'survey_age',
'birth_s',
'income_cut']
X_train = np. array( X_train_)
y_train = np. array( y_train_)
X_test = np. array( X_test_)
X_train. shape
(8000, 145)
y_train. shape
(8000,)
X_test. shape
(2968, 145)
def myFeval ( preds, xgbtrain) :
label = xgbtrain. get_label( )
score = mean_squared_error( label, preds)
return 'myFeval' , score
xgb_params = { "booster" : 'gbtree' , 'eta' : 0.005 , 'max_depth' : 5 , 'subsample' : 0.7 ,
'colsample_bytree' : 0.8 , 'objective' : 'reg:linear' , 'eval_metric' : 'rmse' , 'nthread' : 8 }
folds = KFold( n_splits= 5 , shuffle= True , random_state= 2018 )
oof_xgb = np. zeros( len ( train) )
predictions_xgb = np. zeros( len ( test) )
for fold_, ( trn_idx, val_idx) in enumerate ( folds. split( X_train, y_train) ) :
print ( "fold n°{}" . format ( fold_+ 1 ) )
trn_data = xgb. DMatrix( X_train[ trn_idx] , y_train[ trn_idx] )
val_data = xgb. DMatrix( X_train[ val_idx] , y_train[ val_idx] )
watchlist = [ ( trn_data, 'train' ) , ( val_data, 'valid_data' ) ]
clf = xgb. train( dtrain= trn_data, num_boost_round= 20000 , evals= watchlist, early_stopping_rounds= 200 , verbose_eval= 100 , params= xgb_params, feval = myFeval)
oof_xgb[ val_idx] = clf. predict( xgb. DMatrix( X_train[ val_idx] ) )
predictions_xgb += clf. predict( xgb. DMatrix( X_test) ) / folds. n_splits
print ( "CV score: {:<8.8f}" . format ( mean_squared_error( oof_xgb, y_train_) ) )
fold n°1
[0] train-rmse:0.82393 train-myFeval:0.67886 valid_data-rmse:0.79253 valid_data-myFeval:0.62810
D:\anaconda3\Lib\site-packages\xgboost\training.py:38: UserWarning: `feval` is deprecated, use `custom_metric` instead. They have different behavior when custom objective is also used.See https://xgboost.readthedocs.io/en/latest/tutorials/custom_metric_obj.html for details on the `custom_metric`.
warnings.warn(
D:\anaconda3\Lib\site-packages\xgboost\core.py:160: UserWarning: [11:59:24] WARNING: C:\b\abs_0fh_d4x2ng\croot\xgboost-split_1713973188995\work\cpp_src\src\objective\regression_obj.cu:209: reg:linear is now deprecated in favor of reg:squarederror.
warnings.warn(smsg, UserWarning)
[100] train-rmse:0.74464 train-myFeval:0.55449 valid_data-rmse:0.73502 valid_data-myFeval:0.54025
[200] train-rmse:0.69770 train-myFeval:0.48679 valid_data-rmse:0.70656 valid_data-myFeval:0.49923
[300] train-rmse:0.66722 train-myFeval:0.44518 valid_data-rmse:0.69159 valid_data-myFeval:0.47830
[400] train-rmse:0.64466 train-myFeval:0.41559 valid_data-rmse:0.68242 valid_data-myFeval:0.46570
[500] train-rmse:0.62700 train-myFeval:0.39312 valid_data-rmse:0.67782 valid_data-myFeval:0.45944
[600] train-rmse:0.61203 train-myFeval:0.37459 valid_data-rmse:0.67388 valid_data-myFeval:0.45411
[700] train-rmse:0.59866 train-myFeval:0.35840 valid_data-rmse:0.67140 valid_data-myFeval:0.45078
[800] train-rmse:0.58712 train-myFeval:0.34471 valid_data-rmse:0.66950 valid_data-myFeval:0.44823
[900] train-rmse:0.57628 train-myFeval:0.33210 valid_data-rmse:0.66794 valid_data-myFeval:0.44614
[1000] train-rmse:0.56658 train-myFeval:0.32101 valid_data-rmse:0.66699 valid_data-myFeval:0.44487
[1100] train-rmse:0.55719 train-myFeval:0.31046 valid_data-rmse:0.66613 valid_data-myFeval:0.44373
[1200] train-rmse:0.54873 train-myFeval:0.30110 valid_data-rmse:0.66540 valid_data-myFeval:0.44275
[1300] train-rmse:0.54048 train-myFeval:0.29212 valid_data-rmse:0.66490 valid_data-myFeval:0.44209
[1400] train-rmse:0.53260 train-myFeval:0.28366 valid_data-rmse:0.66446 valid_data-myFeval:0.44150
[1500] train-rmse:0.52540 train-myFeval:0.27605 valid_data-rmse:0.66400 valid_data-myFeval:0.44089
[1600] train-rmse:0.51833 train-myFeval:0.26866 valid_data-rmse:0.66383 valid_data-myFeval:0.44067
[1700] train-rmse:0.51128 train-myFeval:0.26141 valid_data-rmse:0.66348 valid_data-myFeval:0.44020
[1800] train-rmse:0.50453 train-myFeval:0.25455 valid_data-rmse:0.66317 valid_data-myFeval:0.43979
[1900] train-rmse:0.49817 train-myFeval:0.24817 valid_data-rmse:0.66318 valid_data-myFeval:0.43981
[2000] train-rmse:0.49195 train-myFeval:0.24201 valid_data-rmse:0.66303 valid_data-myFeval:0.43961
[2100] train-rmse:0.48594 train-myFeval:0.23614 valid_data-rmse:0.66296 valid_data-myFeval:0.43952
[2200] train-rmse:0.48036 train-myFeval:0.23075 valid_data-rmse:0.66295 valid_data-myFeval:0.43951
[2300] train-rmse:0.47487 train-myFeval:0.22550 valid_data-rmse:0.66298 valid_data-myFeval:0.43955
[2318] train-rmse:0.47389 train-myFeval:0.22457 valid_data-rmse:0.66292 valid_data-myFeval:0.43946
fold n°2
[0] train-rmse:0.81393 train-myFeval:0.66248 valid_data-rmse:0.83294 valid_data-myFeval:0.69378
D:\anaconda3\Lib\site-packages\xgboost\training.py:38: UserWarning: `feval` is deprecated, use `custom_metric` instead. They have different behavior when custom objective is also used.See https://xgboost.readthedocs.io/en/latest/tutorials/custom_metric_obj.html for details on the `custom_metric`.
warnings.warn(
D:\anaconda3\Lib\site-packages\xgboost\core.py:160: UserWarning: [11:59:50] WARNING: C:\b\abs_0fh_d4x2ng\croot\xgboost-split_1713973188995\work\cpp_src\src\objective\regression_obj.cu:209: reg:linear is now deprecated in favor of reg:squarederror.
warnings.warn(smsg, UserWarning)
[100] train-rmse:0.73830 train-myFeval:0.54508 valid_data-rmse:0.76603 valid_data-myFeval:0.58681
[200] train-rmse:0.69270 train-myFeval:0.47983 valid_data-rmse:0.73140 valid_data-myFeval:0.53495
[300] train-rmse:0.66250 train-myFeval:0.43891 valid_data-rmse:0.71361 valid_data-myFeval:0.50923
[400] train-rmse:0.64021 train-myFeval:0.40987 valid_data-rmse:0.70267 valid_data-myFeval:0.49374
[500] train-rmse:0.62234 train-myFeval:0.38731 valid_data-rmse:0.69585 valid_data-myFeval:0.48420
[600] train-rmse:0.60709 train-myFeval:0.36855 valid_data-rmse:0.69103 valid_data-myFeval:0.47752
[700] train-rmse:0.59430 train-myFeval:0.35319 valid_data-rmse:0.68759 valid_data-myFeval:0.47278
[800] train-rmse:0.58285 train-myFeval:0.33972 valid_data-rmse:0.68556 valid_data-myFeval:0.46999
[900] train-rmse:0.57242 train-myFeval:0.32767 valid_data-rmse:0.68383 valid_data-myFeval:0.46763
[1000] train-rmse:0.56302 train-myFeval:0.31699 valid_data-rmse:0.68251 valid_data-myFeval:0.46582
[1100] train-rmse:0.55466 train-myFeval:0.30765 valid_data-rmse:0.68151 valid_data-myFeval:0.46446
[1200] train-rmse:0.54632 train-myFeval:0.29847 valid_data-rmse:0.68079 valid_data-myFeval:0.46347
[1300] train-rmse:0.53884 train-myFeval:0.29035 valid_data-rmse:0.68027 valid_data-myFeval:0.46277
[1400] train-rmse:0.53151 train-myFeval:0.28250 valid_data-rmse:0.67981 valid_data-myFeval:0.46214
[1500] train-rmse:0.52466 train-myFeval:0.27526 valid_data-rmse:0.67950 valid_data-myFeval:0.46171
[1600] train-rmse:0.51782 train-myFeval:0.26813 valid_data-rmse:0.67900 valid_data-myFeval:0.46104
[1700] train-rmse:0.51134 train-myFeval:0.26147 valid_data-rmse:0.67886 valid_data-myFeval:0.46085
[1800] train-rmse:0.50494 train-myFeval:0.25496 valid_data-rmse:0.67881 valid_data-myFeval:0.46078
[1900] train-rmse:0.49861 train-myFeval:0.24862 valid_data-rmse:0.67856 valid_data-myFeval:0.46044
[2000] train-rmse:0.49241 train-myFeval:0.24247 valid_data-rmse:0.67838 valid_data-myFeval:0.46019
[2100] train-rmse:0.48635 train-myFeval:0.23653 valid_data-rmse:0.67825 valid_data-myFeval:0.46002
[2200] train-rmse:0.48057 train-myFeval:0.23094 valid_data-rmse:0.67806 valid_data-myFeval:0.45976
[2300] train-rmse:0.47491 train-myFeval:0.22554 valid_data-rmse:0.67805 valid_data-myFeval:0.45975
[2400] train-rmse:0.46918 train-myFeval:0.22013 valid_data-rmse:0.67793 valid_data-myFeval:0.45959
[2500] train-rmse:0.46408 train-myFeval:0.21537 valid_data-rmse:0.67802 valid_data-myFeval:0.45971
[2566] train-rmse:0.46042 train-myFeval:0.21199 valid_data-rmse:0.67809 valid_data-myFeval:0.45980
fold n°3
[0] train-rmse:0.81545 train-myFeval:0.66495 valid_data-rmse:0.82699 valid_data-myFeval:0.68391
D:\anaconda3\Lib\site-packages\xgboost\training.py:38: UserWarning: `feval` is deprecated, use `custom_metric` instead. They have different behavior when custom objective is also used.See https://xgboost.readthedocs.io/en/latest/tutorials/custom_metric_obj.html for details on the `custom_metric`.
warnings.warn(
D:\anaconda3\Lib\site-packages\xgboost\core.py:160: UserWarning: [12:00:19] WARNING: C:\b\abs_0fh_d4x2ng\croot\xgboost-split_1713973188995\work\cpp_src\src\objective\regression_obj.cu:209: reg:linear is now deprecated in favor of reg:squarederror.
warnings.warn(smsg, UserWarning)
[100] train-rmse:0.73843 train-myFeval:0.54527 valid_data-rmse:0.76373 valid_data-myFeval:0.58328
[200] train-rmse:0.69179 train-myFeval:0.47858 valid_data-rmse:0.73156 valid_data-myFeval:0.53517
[300] train-rmse:0.66140 train-myFeval:0.43745 valid_data-rmse:0.71477 valid_data-myFeval:0.51090
[400] train-rmse:0.63884 train-myFeval:0.40812 valid_data-rmse:0.70460 valid_data-myFeval:0.49647
[500] train-rmse:0.62062 train-myFeval:0.38517 valid_data-rmse:0.69778 valid_data-myFeval:0.48690
[600] train-rmse:0.60568 train-myFeval:0.36685 valid_data-rmse:0.69337 valid_data-myFeval:0.48076
[700] train-rmse:0.59328 train-myFeval:0.35199 valid_data-rmse:0.69052 valid_data-myFeval:0.47682
[800] train-rmse:0.58212 train-myFeval:0.33886 valid_data-rmse:0.68814 valid_data-myFeval:0.47354
[900] train-rmse:0.57236 train-myFeval:0.32760 valid_data-rmse:0.68634 valid_data-myFeval:0.47106
[1000] train-rmse:0.56341 train-myFeval:0.31743 valid_data-rmse:0.68517 valid_data-myFeval:0.46945
[1100] train-rmse:0.55465 train-myFeval:0.30763 valid_data-rmse:0.68417 valid_data-myFeval:0.46809
[1200] train-rmse:0.54672 train-myFeval:0.29890 valid_data-rmse:0.68318 valid_data-myFeval:0.46674
[1300] train-rmse:0.53884 train-myFeval:0.29035 valid_data-rmse:0.68216 valid_data-myFeval:0.46534
[1400] train-rmse:0.53153 train-myFeval:0.28252 valid_data-rmse:0.68167 valid_data-myFeval:0.46468
[1500] train-rmse:0.52451 train-myFeval:0.27511 valid_data-rmse:0.68141 valid_data-myFeval:0.46432
[1600] train-rmse:0.51747 train-myFeval:0.26777 valid_data-rmse:0.68077 valid_data-myFeval:0.46345
[1700] train-rmse:0.51073 train-myFeval:0.26085 valid_data-rmse:0.68039 valid_data-myFeval:0.46294
[1800] train-rmse:0.50427 train-myFeval:0.25428 valid_data-rmse:0.68006 valid_data-myFeval:0.46248
[1900] train-rmse:0.49816 train-myFeval:0.24816 valid_data-rmse:0.67991 valid_data-myFeval:0.46228
[2000] train-rmse:0.49182 train-myFeval:0.24189 valid_data-rmse:0.67962 valid_data-myFeval:0.46188
[2100] train-rmse:0.48562 train-myFeval:0.23583 valid_data-rmse:0.67944 valid_data-myFeval:0.46163
[2200] train-rmse:0.47992 train-myFeval:0.23033 valid_data-rmse:0.67926 valid_data-myFeval:0.46140
[2300] train-rmse:0.47411 train-myFeval:0.22478 valid_data-rmse:0.67922 valid_data-myFeval:0.46135
[2358] train-rmse:0.47054 train-myFeval:0.22141 valid_data-rmse:0.67929 valid_data-myFeval:0.46143
fold n°4
[0] train-rmse:0.81419 train-myFeval:0.66291 valid_data-rmse:0.83171 valid_data-myFeval:0.69174
D:\anaconda3\Lib\site-packages\xgboost\training.py:38: UserWarning: `feval` is deprecated, use `custom_metric` instead. They have different behavior when custom objective is also used.See https://xgboost.readthedocs.io/en/latest/tutorials/custom_metric_obj.html for details on the `custom_metric`.
warnings.warn(
D:\anaconda3\Lib\site-packages\xgboost\core.py:160: UserWarning: [12:00:47] WARNING: C:\b\abs_0fh_d4x2ng\croot\xgboost-split_1713973188995\work\cpp_src\src\objective\regression_obj.cu:209: reg:linear is now deprecated in favor of reg:squarederror.
warnings.warn(smsg, UserWarning)
[100] train-rmse:0.73899 train-myFeval:0.54611 valid_data-rmse:0.76583 valid_data-myFeval:0.58649
[200] train-rmse:0.69395 train-myFeval:0.48157 valid_data-rmse:0.73003 valid_data-myFeval:0.53294
[300] train-rmse:0.66406 train-myFeval:0.44098 valid_data-rmse:0.71098 valid_data-myFeval:0.50549
[400] train-rmse:0.64155 train-myFeval:0.41158 valid_data-rmse:0.69926 valid_data-myFeval:0.48896
[500] train-rmse:0.62370 train-myFeval:0.38900 valid_data-rmse:0.69177 valid_data-myFeval:0.47854
[600] train-rmse:0.60857 train-myFeval:0.37036 valid_data-rmse:0.68674 valid_data-myFeval:0.47161
[700] train-rmse:0.59574 train-myFeval:0.35490 valid_data-rmse:0.68317 valid_data-myFeval:0.46672
[800] train-rmse:0.58467 train-myFeval:0.34184 valid_data-rmse:0.68054 valid_data-myFeval:0.46314
[900] train-rmse:0.57457 train-myFeval:0.33013 valid_data-rmse:0.67864 valid_data-myFeval:0.46055
[1000] train-rmse:0.56524 train-myFeval:0.31949 valid_data-rmse:0.67731 valid_data-myFeval:0.45874
[1100] train-rmse:0.55640 train-myFeval:0.30958 valid_data-rmse:0.67607 valid_data-myFeval:0.45707
[1200] train-rmse:0.54859 train-myFeval:0.30095 valid_data-rmse:0.67567 valid_data-myFeval:0.45652
[1300] train-rmse:0.54050 train-myFeval:0.29214 valid_data-rmse:0.67497 valid_data-myFeval:0.45558
[1400] train-rmse:0.53326 train-myFeval:0.28437 valid_data-rmse:0.67459 valid_data-myFeval:0.45508
[1500] train-rmse:0.52619 train-myFeval:0.27688 valid_data-rmse:0.67447 valid_data-myFeval:0.45491
[1600] train-rmse:0.51927 train-myFeval:0.26964 valid_data-rmse:0.67424 valid_data-myFeval:0.45460
[1700] train-rmse:0.51242 train-myFeval:0.26257 valid_data-rmse:0.67422 valid_data-myFeval:0.45457
[1800] train-rmse:0.50599 train-myFeval:0.25602 valid_data-rmse:0.67378 valid_data-myFeval:0.45397
[1900] train-rmse:0.49975 train-myFeval:0.24975 valid_data-rmse:0.67368 valid_data-myFeval:0.45384
[2000] train-rmse:0.49370 train-myFeval:0.24374 valid_data-rmse:0.67376 valid_data-myFeval:0.45395
[2068] train-rmse:0.48945 train-myFeval:0.23956 valid_data-rmse:0.67375 valid_data-myFeval:0.45393
fold n°5
[0] train-rmse:0.82091 train-myFeval:0.67389 valid_data-rmse:0.80489 valid_data-myFeval:0.64785
D:\anaconda3\Lib\site-packages\xgboost\training.py:38: UserWarning: `feval` is deprecated, use `custom_metric` instead. They have different behavior when custom objective is also used.See https://xgboost.readthedocs.io/en/latest/tutorials/custom_metric_obj.html for details on the `custom_metric`.
warnings.warn(
D:\anaconda3\Lib\site-packages\xgboost\core.py:160: UserWarning: [12:01:12] WARNING: C:\b\abs_0fh_d4x2ng\croot\xgboost-split_1713973188995\work\cpp_src\src\objective\regression_obj.cu:209: reg:linear is now deprecated in favor of reg:squarederror.
warnings.warn(smsg, UserWarning)
[100] train-rmse:0.74096 train-myFeval:0.54903 valid_data-rmse:0.74931 valid_data-myFeval:0.56147
[200] train-rmse:0.69341 train-myFeval:0.48081 valid_data-rmse:0.72129 valid_data-myFeval:0.52026
[300] train-rmse:0.66212 train-myFeval:0.43841 valid_data-rmse:0.70749 valid_data-myFeval:0.50054
[400] train-rmse:0.63931 train-myFeval:0.40871 valid_data-rmse:0.69945 valid_data-myFeval:0.48923
[500] train-rmse:0.62114 train-myFeval:0.38582 valid_data-rmse:0.69414 valid_data-myFeval:0.48183
[600] train-rmse:0.60610 train-myFeval:0.36736 valid_data-rmse:0.69065 valid_data-myFeval:0.47699
[700] train-rmse:0.59310 train-myFeval:0.35177 valid_data-rmse:0.68823 valid_data-myFeval:0.47365
[800] train-rmse:0.58151 train-myFeval:0.33816 valid_data-rmse:0.68631 valid_data-myFeval:0.47102
[900] train-rmse:0.57079 train-myFeval:0.32580 valid_data-rmse:0.68496 valid_data-myFeval:0.46916
[1000] train-rmse:0.56123 train-myFeval:0.31498 valid_data-rmse:0.68392 valid_data-myFeval:0.46774
[1100] train-rmse:0.55244 train-myFeval:0.30519 valid_data-rmse:0.68290 valid_data-myFeval:0.46636
[1200] train-rmse:0.54402 train-myFeval:0.29596 valid_data-rmse:0.68194 valid_data-myFeval:0.46504
[1300] train-rmse:0.53623 train-myFeval:0.28754 valid_data-rmse:0.68147 valid_data-myFeval:0.46440
[1400] train-rmse:0.52838 train-myFeval:0.27918 valid_data-rmse:0.68073 valid_data-myFeval:0.46339
[1500] train-rmse:0.52094 train-myFeval:0.27138 valid_data-rmse:0.68026 valid_data-myFeval:0.46276
[1600] train-rmse:0.51413 train-myFeval:0.26433 valid_data-rmse:0.67992 valid_data-myFeval:0.46229
[1700] train-rmse:0.50725 train-myFeval:0.25731 valid_data-rmse:0.67938 valid_data-myFeval:0.46155
[1800] train-rmse:0.50086 train-myFeval:0.25086 valid_data-rmse:0.67905 valid_data-myFeval:0.46111
[1900] train-rmse:0.49443 train-myFeval:0.24446 valid_data-rmse:0.67880 valid_data-myFeval:0.46078
[2000] train-rmse:0.48809 train-myFeval:0.23823 valid_data-rmse:0.67859 valid_data-myFeval:0.46048
[2100] train-rmse:0.48215 train-myFeval:0.23247 valid_data-rmse:0.67830 valid_data-myFeval:0.46009
[2200] train-rmse:0.47621 train-myFeval:0.22678 valid_data-rmse:0.67795 valid_data-myFeval:0.45962
[2300] train-rmse:0.47052 train-myFeval:0.22139 valid_data-rmse:0.67806 valid_data-myFeval:0.45977
[2398] train-rmse:0.46456 train-myFeval:0.21582 valid_data-rmse:0.67804 valid_data-myFeval:0.45974
CV score: 0.45487181
param = { 'boosting_type' : 'gbdt' ,
'num_leaves' : 20 ,
'min_data_in_leaf' : 20 ,
'objective' : 'regression' ,
'max_depth' : 6 ,
'learning_rate' : 0.01 ,
"min_child_samples" : 30 ,
"feature_fraction" : 0.8 ,
"bagging_freq" : 1 ,
"bagging_fraction" : 0.8 ,
"bagging_seed" : 11 ,
"metric" : 'mse' ,
"lambda_l1" : 0.1 ,
"verbosity" : - 1 }
folds = KFold( n_splits= 5 , shuffle= True , random_state= 2018 )
oof_lgb = np. zeros( len ( X_train_) )
predictions_lgb = np. zeros( len ( X_test_) )
for fold_, ( trn_idx, val_idx) in enumerate ( folds. split( X_train, y_train) ) :
print ( "fold n°{}" . format ( fold_+ 1 ) )
trn_data = lgb. Dataset( X_train[ trn_idx] , y_train[ trn_idx] )
val_data = lgb. Dataset( X_train[ val_idx] , y_train[ val_idx] )
num_round = 10000
clf = lgb. train( param, trn_data, num_round, valid_sets = [ trn_data, val_data] )
oof_lgb[ val_idx] = clf. predict( X_train[ val_idx] , num_iteration= clf. best_iteration)
predictions_lgb += clf. predict( X_test, num_iteration= clf. best_iteration) / folds. n_splits
print ( "CV score: {:<8.8f}" . format ( mean_squared_error( oof_lgb, y_train_) ) )
fold n°1
fold n°2
fold n°3
fold n°4
fold n°5
CV score: 0.47250842
from catboost import Pool, CatBoostRegressor
from sklearn. model_selection import train_test_split
kfolder = KFold( n_splits= 5 , shuffle= True , random_state= 2019 )
oof_cb = np. zeros( len ( X_train_) )
predictions_cb = np. zeros( len ( X_test_) )
kfold = kfolder. split( X_train_, y_train_)
fold_= 0
for train_index, vali_index in kfold:
print ( "fold n°{}" . format ( fold_) )
fold_= fold_+ 1
k_x_train = X_train[ train_index]
k_y_train = y_train[ train_index]
k_x_vali = X_train[ vali_index]
k_y_vali = y_train[ vali_index]
cb_params = {
'n_estimators' : 100000 ,
'loss_function' : 'RMSE' ,
'eval_metric' : 'RMSE' ,
'learning_rate' : 0.05 ,
'depth' : 5 ,
'use_best_model' : True ,
'subsample' : 0.6 ,
'bootstrap_type' : 'Bernoulli' ,
'reg_lambda' : 3
}
model_cb = CatBoostRegressor( ** cb_params)
model_cb. fit( k_x_train, k_y_train, eval_set= [ ( k_x_vali, k_y_vali) ] , verbose= 100 , early_stopping_rounds= 50 )
oof_cb[ vali_index] = model_cb. predict( k_x_vali, ntree_end= model_cb. best_iteration_)
predictions_cb += model_cb. predict( X_test_, ntree_end= model_cb. best_iteration_) / kfolder. n_splits
print ( "CV score: {:<8.8f}" . format ( mean_squared_error( oof_cb, y_train_) ) )
fold n°0
0: learn: 0.8175871 test: 0.7820939 best: 0.7820939 (0) total: 146ms remaining: 4h 3m
100: learn: 0.6711041 test: 0.6749289 best: 0.6749289 (100) total: 731ms remaining: 12m 2s
200: learn: 0.6410910 test: 0.6688829 best: 0.6686703 (190) total: 1.67s remaining: 13m 47s
300: learn: 0.6130819 test: 0.6669464 best: 0.6668201 (282) total: 2.57s remaining: 14m 10s
400: learn: 0.5895197 test: 0.6666901 best: 0.6663658 (371) total: 3.81s remaining: 15m 45s
500: learn: 0.5684832 test: 0.6657841 best: 0.6654600 (478) total: 4.98s remaining: 16m 29s
Stopped by overfitting detector (50 iterations wait)
bestTest = 0.6654599993
bestIteration = 478
Shrink model to first 479 iterations.
fold n°1
0: learn: 0.8107754 test: 0.8172376 best: 0.8172376 (0) total: 7.99ms remaining: 13m 18s
100: learn: 0.6715406 test: 0.6800052 best: 0.6800052 (100) total: 1.49s remaining: 24m 35s
200: learn: 0.6428284 test: 0.6699391 best: 0.6699391 (200) total: 2.69s remaining: 22m 18s
300: learn: 0.6144500 test: 0.6663790 best: 0.6662390 (298) total: 3.94s remaining: 21m 46s
400: learn: 0.5905343 test: 0.6643743 best: 0.6641256 (388) total: 5.16s remaining: 21m 21s
500: learn: 0.5703917 test: 0.6632232 best: 0.6632137 (497) total: 6.27s remaining: 20m 45s
600: learn: 0.5523517 test: 0.6626011 best: 0.6620170 (579) total: 7.44s remaining: 20m 30s
Stopped by overfitting detector (50 iterations wait)
bestTest = 0.6620170222
bestIteration = 579
Shrink model to first 580 iterations.
fold n°2
0: learn: 0.8046145 test: 0.8370989 best: 0.8370989 (0) total: 39.5ms remaining: 1h 5m 47s
100: learn: 0.6652528 test: 0.7059731 best: 0.7059731 (100) total: 1.26s remaining: 20m 45s
200: learn: 0.6356395 test: 0.6958527 best: 0.6958527 (200) total: 2.57s remaining: 21m 17s
300: learn: 0.6079444 test: 0.6913800 best: 0.6913800 (300) total: 3.91s remaining: 21m 36s
400: learn: 0.5848883 test: 0.6900293 best: 0.6900293 (400) total: 5.16s remaining: 21m 20s
500: learn: 0.5637398 test: 0.6896119 best: 0.6889243 (455) total: 6.35s remaining: 21m 1s
Stopped by overfitting detector (50 iterations wait)
bestTest = 0.6889243403
bestIteration = 455
Shrink model to first 456 iterations.
fold n°3
0: learn: 0.8156897 test: 0.7928103 best: 0.7928103 (0) total: 7.7ms remaining: 12m 50s
100: learn: 0.6666901 test: 0.6886018 best: 0.6886018 (100) total: 1.27s remaining: 20m 59s
200: learn: 0.6349422 test: 0.6834388 best: 0.6834388 (200) total: 2.61s remaining: 21m 35s
300: learn: 0.6054434 test: 0.6814056 best: 0.6806466 (259) total: 3.98s remaining: 22m
Stopped by overfitting detector (50 iterations wait)
bestTest = 0.680646584
bestIteration = 259
Shrink model to first 260 iterations.
fold n°4
0: learn: 0.8073054 test: 0.8273646 best: 0.8273646 (0) total: 7.88ms remaining: 13m 7s
100: learn: 0.6617636 test: 0.7072268 best: 0.7072268 (100) total: 1.46s remaining: 24m 1s
200: learn: 0.6326520 test: 0.6986823 best: 0.6985780 (193) total: 2.87s remaining: 23m 46s
300: learn: 0.6047984 test: 0.6949317 best: 0.6949112 (296) total: 4.16s remaining: 22m 56s
400: learn: 0.5809457 test: 0.6927416 best: 0.6925554 (375) total: 5.45s remaining: 22m 32s
Stopped by overfitting detector (50 iterations wait)
bestTest = 0.6925554216
bestIteration = 375
Shrink model to first 376 iterations.
CV score: 0.45983020
from sklearn import linear_model
train_stack = np. vstack( [ oof_lgb, oof_xgb, oof_cb] ) . transpose( )
test_stack = np. vstack( [ predictions_lgb, predictions_xgb, predictions_cb] ) . transpose( )
folds_stack = RepeatedKFold( n_splits= 5 , n_repeats= 2 , random_state= 2018 )
oof_stack = np. zeros( train_stack. shape[ 0 ] )
predictions = np. zeros( test_stack. shape[ 0 ] )
for fold_, ( trn_idx, val_idx) in enumerate ( folds_stack. split( train_stack, y_train) ) :
print ( "fold {}" . format ( fold_) )
trn_data, trn_y = train_stack[ trn_idx] , y_train[ trn_idx]
val_data, val_y = train_stack[ val_idx] , y_train[ val_idx]
clf_3 = linear_model. BayesianRidge( )
clf_3. fit( trn_data, trn_y)
oof_stack[ val_idx] = clf_3. predict( val_data)
predictions += clf_3. predict( test_stack) / 10
print ( "CV score: {:<8.8f}" . format ( mean_squared_error( oof_stack, y_train_) ) )
fold 0
fold 1
fold 2
fold 3
fold 4
fold 5
fold 6
fold 7
fold 8
fold 9
CV score: 0.45369897
result= list ( predictions)
result= list ( map ( lambda x: x + 1 , result) )
test_sub[ "happiness" ] = result
test_sub. to_csv( "submit_20240502.csv" , index= False )
% matplotlib inline
import numpy as np
import matplotlib. pyplot as plt
from scipy. special import jn
from IPython. display import display, clear_output
import time
x = np. linspace( 0 , 5 )
f, ax = plt. subplots( )
ax. set_title( "Bessel functions" )
for n in range ( 1 , 10 ) :
time. sleep( 1 )
ax. plot( x, jn( x, n) )
clear_output( wait= True )
display( f)
plt. close( )