04_机器学习赛事_一起挖掘幸福感

本文介绍了参与机器学习赛事的过程,从数据导入、查看到预处理的详细步骤,旨在探索和挖掘幸福感的相关因素。
摘要由CSDN通过智能技术生成

在这里插入图片描述

1. 函数库导入

import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error
import lightgbm as lgb
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import KFold, RepeatedKFold
from scipy import sparse
#显示所有列
pd.set_option('display.max_columns', None)
#显示所有行
pd.set_option('display.max_rows', None)
from datetime import datetime

2. 导入数据

#导入数据
train_abbr=pd.read_csv("./data/happiness_train_abbr.csv",encoding='ISO-8859-1')
train=pd.read_csv("./data/happiness_train_complete.csv",encoding='ISO-8859-1')
test_abbr=pd.read_csv("./data/happiness_test_abbr.csv",encoding='ISO-8859-1')
test=pd.read_csv("./data/happiness_test_complete.csv",encoding='ISO-8859-1')
test_sub=pd.read_csv("./data/happiness_submit.csv",encoding='ISO-8859-1')

3. 查看数据

#观察数据大小
test.shape
(2968, 139)
test_sub.shape
(2968, 2)
train.shape
(8000, 140)
#简单查看数据
train.head()
idhappinesssurvey_typeprovincecitycountysurvey_timegenderbirthnationalityreligionreligion_freqeduedu_otheredu_statusedu_yrincomepoliticaljoin_partyfloor_areaproperty_0property_1property_2property_3property_4property_5property_6property_7property_8property_otherheight_cmweight_jinhealthhealth_problemdepressionhukouhukou_locmedia_1media_2media_3media_4media_5media_6leisure_1leisure_2leisure_3leisure_4leisure_5leisure_6leisure_7leisure_8leisure_9leisure_10leisure_11leisure_12socializerelaxlearnsocial_neighborsocial_friendsocia_outingequityclassclass_10_beforeclass_10_afterclass_14work_experwork_statuswork_yrwork_typework_manageinsur_1insur_2insur_3insur_4family_incomefamily_mfamily_statushousecarinvest_0invest_1invest_2invest_3invest_4invest_5invest_6invest_7invest_8invest_othersondaughterminor_childmaritalmarital_1sts_birthmarital_nows_edus_politicals_hukous_incomes_work_expers_work_statuss_work_typef_birthf_eduf_politicalf_work_14m_birthm_edum_politicalm_work_14status_peerstatus_3_beforeviewinc_abilityinc_exptrust_1trust_2trust_3trust_4trust_5trust_6trust_7trust_8trust_9trust_10trust_11trust_12trust_13neighbor_familiaritypublic_service_1public_service_2public_service_3public_service_4public_service_5public_service_6public_service_7public_service_8public_service_9
01411232592015/8/4 14:181195911111NaN4.0-2.0200001NaN45.0010000000NaN17615532552.04255431431234145412433.03.023333113.030.01.02.0111260000.02212010000000NaN100.031984.01958.01984.06.01.05.040000.05.0NaNNaN-2441-2411324350000.042-8-8532343-84145060505030.030505050
12421852852015/7/21 15:041199211112NaN4.02013.0200001NaN110.0000010000NaN17011054311.02213512343543234512436.02.013648513.02.01.03.0111140000.03412010000000NaN00NaN1NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN19723121973312114250000.0544353332333239070708085.070906060
234229831262015/7/21 13:24219671034NaN4.0-2.020001NaN120.0011000000NaN16012244511.02225131443544235553422.05.02454632NaNNaNNaNNaN11228000.03312010000000NaN021.031990.01968.01990.03.01.01.06000.03.0NaNNaN-2112-2112214280000.03333433333-83149080757980.090909075
34521028512015/7/25 17:33219431113NaN4.01959.064201NaN78.0000100000NaN16317044412.02115111524545115552441.06.01455724NaNNaNNaNNaN222212000.03311010000000NaN140.071960.0NaNNaNNaNNaNNaNNaNNaNNaNNaN-21412-2112213210000.03343533543332310090708080.090908080
4541718362015/8/10 9:502199411112NaN1.02014.0-12NaN70.0000010000NaN16511055323.01342553332443525514347.05.03211146NaNNaNNaNNaN1222-2.04311010000000NaN00NaN1NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1970611019724115323-8200000.0433355343333225050505050.050505050
#查看数据是否缺失
train.info(verbose=True,show_counts=True)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8000 entries, 0 to 7999
Data columns (total 140 columns):
 #    Column                Non-Null Count  Dtype  
---   ------                --------------  -----  
 0    id                    8000 non-null   int64  
 1    happiness             8000 non-null   int64  
 2    survey_type           8000 non-null   int64  
 3    province              8000 non-null   int64  
 4    city                  8000 non-null   int64  
 5    county                8000 non-null   int64  
 6    survey_time           8000 non-null   object 
 7    gender                8000 non-null   int64  
 8    birth                 8000 non-null   int64  
 9    nationality           8000 non-null   int64  
 10   religion              8000 non-null   int64  
 11   religion_freq         8000 non-null   int64  
 12   edu                   8000 non-null   int64  
 13   edu_other             3 non-null      object 
 14   edu_status            6880 non-null   float64
 15   edu_yr                6028 non-null   float64
 16   income                8000 non-null   int64  
 17   political             8000 non-null   int64  
 18   join_party            824 non-null    float64
 19   floor_area            8000 non-null   float64
 20   property_0            8000 non-null   int64  
 21   property_1            8000 non-null   int64  
 22   property_2            8000 non-null   int64  
 23   property_3            8000 non-null   int64  
 24   property_4            8000 non-null   int64  
 25   property_5            8000 non-null   int64  
 26   property_6            8000 non-null   int64  
 27   property_7            8000 non-null   int64  
 28   property_8            8000 non-null   int64  
 29   property_other        66 non-null     object 
 30   height_cm             8000 non-null   int64  
 31   weight_jin            8000 non-null   int64  
 32   health                8000 non-null   int64  
 33   health_problem        8000 non-null   int64  
 34   depression            8000 non-null   int64  
 35   hukou                 8000 non-null   int64  
 36   hukou_loc             7996 non-null   float64
 37   media_1               8000 non-null   int64  
 38   media_2               8000 non-null   int64  
 39   media_3               8000 non-null   int64  
 40   media_4               8000 non-null   int64  
 41   media_5               8000 non-null   int64  
 42   media_6               8000 non-null   int64  
 43   leisure_1             8000 non-null   int64  
 44   leisure_2             8000 non-null   int64  
 45   leisure_3             8000 non-null   int64  
 46   leisure_4             8000 non-null   int64  
 47   leisure_5             8000 non-null   int64  
 48   leisure_6             8000 non-null   int64  
 49   leisure_7             8000 non-null   int64  
 50   leisure_8             8000 non-null   int64  
 51   leisure_9             8000 non-null   int64  
 52   leisure_10            8000 non-null   int64  
 53   leisure_11            8000 non-null   int64  
 54   leisure_12            8000 non-null   int64  
 55   socialize             8000 non-null   int64  
 56   relax                 8000 non-null   int64  
 57   learn                 8000 non-null   int64  
 58   social_neighbor       7204 non-null   float64
 59   social_friend         7204 non-null   float64
 60   socia_outing          8000 non-null   int64  
 61   equity                8000 non-null   int64  
 62   class                 8000 non-null   int64  
 63   class_10_before       8000 non-null   int64  
 64   class_10_after        8000 non-null   int64  
 65   class_14              8000 non-null   int64  
 66   work_exper            8000 non-null   int64  
 67   work_status           2951 non-null   float64
 68   work_yr               2951 non-null   float64
 69   work_type             2951 non-null   float64
 70   work_manage           2951 non-null   float64
 71   insur_1               8000 non-null   int64  
 72   insur_2               8000 non-null   int64  
 73   insur_3               8000 non-null   int64  
 74   insur_4               8000 non-null   int64  
 75   family_income         7999 non-null   float64
 76   family_m              8000 non-null   int64  
 77   family_status         8000 non-null   int64  
 78   house                 8000 non-null   int64  
 79   car                   8000 non-null   int64  
 80   invest_0              8000 non-null   int64  
 81   invest_1              8000 non-null   int64  
 82   invest_2              8000 non-null   int64  
 83   invest_3              8000 non-null   int64  
 84   invest_4              8000 non-null   int64  
 85   invest_5              8000 non-null   int64  
 86   invest_6              8000 non-null   int64  
 87   invest_7              8000 non-null   int64  
 88   invest_8              8000 non-null   int64  
 89   invest_other          29 non-null     object 
 90   son                   8000 non-null   int64  
 91   daughter              8000 non-null   int64  
 92   minor_child           6934 non-null   float64
 93   marital               8000 non-null   int64  
 94   marital_1st           7172 non-null   float64
 95   s_birth               6282 non-null   float64
 96   marital_now           6230 non-null   float64
 97   s_edu                 6282 non-null   float64
 98   s_political           6282 non-null   float64
 99   s_hukou               6282 non-null   float64
 100  s_income              6282 non-null   float64
 101  s_work_exper          6282 non-null   float64
 102  s_work_status         2565 non-null   float64
 103  s_work_type           2565 non-null   float64
 104  f_birth               8000 non-null   int64  
 105  f_edu                 8000 non-null   int64  
 106  f_political           8000 non-null   int64  
 107  f_work_14             8000 non-null   int64  
 108  m_birth               8000 non-null   int64  
 109  m_edu                 8000 non-null   int64  
 110  m_political           8000 non-null   int64  
 111  m_work_14             8000 non-null   int64  
 112  status_peer           8000 non-null   int64  
 113  status_3_before       8000 non-null   int64  
 114  view                  8000 non-null   int64  
 115  inc_ability           8000 non-null   int64  
 116  inc_exp               8000 non-null   float64
 117  trust_1               8000 non-null   int64  
 118  trust_2               8000 non-null   int64  
 119  trust_3               8000 non-null   int64  
 120  trust_4               8000 non-null   int64  
 121  trust_5               8000 non-null   int64  
 122  trust_6               8000 non-null   int64  
 123  trust_7               8000 non-null   int64  
 124  trust_8               8000 non-null   int64  
 125  trust_9               8000 non-null   int64  
 126  trust_10              8000 non-null   int64  
 127  trust_11              8000 non-null   int64  
 128  trust_12              8000 non-null   int64  
 129  trust_13              8000 non-null   int64  
 130  neighbor_familiarity  8000 non-null   int64  
 131  public_service_1      8000 non-null   int64  
 132  public_service_2      8000 non-null   int64  
 133  public_service_3      8000 non-null   int64  
 134  public_service_4      8000 non-null   int64  
 135  public_service_5      8000 non-null   float64
 136  public_service_6      8000 non-null   int64  
 137  public_service_7      8000 non-null   int64  
 138  public_service_8      8000 non-null   int64  
 139  public_service_9      8000 non-null   int64  
dtypes: float64(25), int64(111), object(4)
memory usage: 8.5+ MB
#查看label分布
y_train_=train["happiness"]
y_train_.value_counts()
happiness
 4    4818
 5    1410
 3    1159
 2     497
 1     104
-8      12
Name: count, dtype: int64
#将-8换成3
y_train_=y_train_.map(lambda x:3 if x==-8 else x)
#让label从0开始
y_train_=y_train_.map(lambda x:x-1)
#train和test连在一起
data = pd.concat([train,test],axis=0,ignore_index=True)
#全部数据大小
data.shape
(10968, 140)

4.数据处理

#处理时间特征
data['survey_time'] = pd.to_datetime(data['survey_time'],format='%Y/%m/%d %H:%M')
data["weekday"]=data["survey_time"].dt.weekday
data["year"]=data["survey_time"].dt.year
data["quarter"]=data["survey_time"].dt.quarter
data["hour"]=data["survey_time"].dt.hour
data["month"]=data["survey_time"].dt.month
#把一天的时间分段
def hour_cut(x):
    if 0<=x<6:
        return 0
    elif  6<=x<8:
        return 1
    elif  8<=x<12:
        return 2
    elif  12<=x<14:
        return 3
    elif  14<=x<18:
        return 4
    elif  18<=x<21:
        return 5
    elif  21<=x<24:
        return 6

    
data["hour_cut"]=data["hour"].map(hour_cut)
#做问卷时候的年龄
data["survey_age"]=data["year"]-data["birth"]
#让label从0开始
data["happiness"]=data["happiness"].map(lambda x:x-1)
#去掉三个缺失值很多的
data=data.drop(["edu_other"], axis=1)
data=data.drop(["happiness"], axis=1)
data=data.drop(["survey_time"], axis=1)
#是否入党
data["join_party"]=data["join_party"].map(lambda x:0 if pd.isnull(x)  else 1)
#出生的年代
def birth_split(x):
    if 1920<=x<=1930:
        return 0
    elif  1930<x<=1940:
        return 1
    elif  1940<x<=1950:
        return 2
    elif  1950<x<=1960:
        return 3
    elif  1960<x<=1970:
        return 4
    elif  1970<x<=1980:
        return 5
    elif  1980<x<=1990:
        return 6
    elif  1990<x<=2000:
        return 7
    
data["birth_s"]=data["birth"].map(birth_split)
#收入分组
def income_cut(x):
    if x<0:
        return 0
    elif  0<=x<1200:
        return 1
    elif  1200<x<=10000:
        return 2
    elif  10000<x<24000:
        return 3
    elif  24000<x<40000:
        return 4
    elif  40000<=x:
        return 5
 

    
data["income_cut"]=data["income"].map(income_cut)
#填充数据
data["edu_status"]=data["edu_status"].fillna(5)
data["edu_yr"]=data["edu_yr"].fillna(-2)
data["property_other"]=data["property_other"].map(lambda x:0 if pd.isnull(x)  else 1)
data["hukou_loc"]=data["hukou_loc"].fillna(1)
data["social_neighbor"]=data["social_neighbor"].fillna(8)
data["social_friend"]=data["social_friend"].fillna(8)
data["work_status"]=data["work_status"].fillna(0)
data["work_yr"]=data["work_yr"].fillna(0)
data["work_type"]=data["work_type"].fillna(0)
data["work_manage"]=data["work_manage"].fillna(0)
data["family_income"]=data["family_income"].fillna(-2)
data["invest_other"]=data["invest_other"].map(lambda x:0 if pd.isnull(x)  else 1)
#填充数据
data["minor_child"]=data["minor_child"].fillna(0)
data["marital_1st"]=data["marital_1st"].fillna(0)
data["s_birth"]=data["s_birth"].fillna(0)
data["marital_now"]=data["marital_now"].fillna(0)
data["s_edu"]=data["s_edu"].fillna(0)
data["s_political"]=data["s_political"].fillna(0)
data["s_hukou"]=data["s_hukou"].fillna(0)
data["s_income"]=data["s_income"].fillna(0)
data["s_work_exper"]=data["s_work_exper"].fillna(0)
data["s_work_status"]=data["s_work_status"].fillna(0)
data["s_work_type"]=data["s_work_type"].fillna(0)
data=data.drop(["id"], axis=1)
X_train_ = data[:train.shape[0]]
X_test_  = data[train.shape[0]:]
target_column = 'happiness'
feature_columns=list(X_test_.columns) 
feature_columns
['survey_type',
 'province',
 'city',
 'county',
 'gender',
 'birth',
 'nationality',
 'religion',
 'religion_freq',
 'edu',
 'edu_status',
 'edu_yr',
 'income',
 'political',
 'join_party',
 'floor_area',
 'property_0',
 'property_1',
 'property_2',
 'property_3',
 'property_4',
 'property_5',
 'property_6',
 'property_7',
 'property_8',
 'property_other',
 'height_cm',
 'weight_jin',
 'health',
 'health_problem',
 'depression',
 'hukou',
 'hukou_loc',
 'media_1',
 'media_2',
 'media_3',
 'media_4',
 'media_5',
 'media_6',
 'leisure_1',
 'leisure_2',
 'leisure_3',
 'leisure_4',
 'leisure_5',
 'leisure_6',
 'leisure_7',
 'leisure_8',
 'leisure_9',
 'leisure_10',
 'leisure_11',
 'leisure_12',
 'socialize',
 'relax',
 'learn',
 'social_neighbor',
 'social_friend',
 'socia_outing',
 'equity',
 'class',
 'class_10_before',
 'class_10_after',
 'class_14',
 'work_exper',
 'work_status',
 'work_yr',
 'work_type',
 'work_manage',
 'insur_1',
 'insur_2',
 'insur_3',
 'insur_4',
 'family_income',
 'family_m',
 'family_status',
 'house',
 'car',
 'invest_0',
 'invest_1',
 'invest_2',
 'invest_3',
 'invest_4',
 'invest_5',
 'invest_6',
 'invest_7',
 'invest_8',
 'invest_other',
 'son',
 'daughter',
 'minor_child',
 'marital',
 'marital_1st',
 's_birth',
 'marital_now',
 's_edu',
 's_political',
 's_hukou',
 's_income',
 's_work_exper',
 's_work_status',
 's_work_type',
 'f_birth',
 'f_edu',
 'f_political',
 'f_work_14',
 'm_birth',
 'm_edu',
 'm_political',
 'm_work_14',
 'status_peer',
 'status_3_before',
 'view',
 'inc_ability',
 'inc_exp',
 'trust_1',
 'trust_2',
 'trust_3',
 'trust_4',
 'trust_5',
 'trust_6',
 'trust_7',
 'trust_8',
 'trust_9',
 'trust_10',
 'trust_11',
 'trust_12',
 'trust_13',
 'neighbor_familiarity',
 'public_service_1',
 'public_service_2',
 'public_service_3',
 'public_service_4',
 'public_service_5',
 'public_service_6',
 'public_service_7',
 'public_service_8',
 'public_service_9',
 'weekday',
 'year',
 'quarter',
 'hour',
 'month',
 'hour_cut',
 'survey_age',
 'birth_s',
 'income_cut']
X_train = np.array(X_train_)
y_train = np.array(y_train_)
X_test  = np.array(X_test_)
X_train.shape
(8000, 145)
y_train.shape
(8000,)
X_test.shape
(2968, 145)
#自定义评价函数
def myFeval(preds, xgbtrain):
    label = xgbtrain.get_label()
    score = mean_squared_error(label,preds)
    return 'myFeval',score
##### xgb

xgb_params = {"booster":'gbtree','eta': 0.005, 'max_depth': 5, 'subsample': 0.7, 
              'colsample_bytree': 0.8, 'objective': 'reg:linear', 'eval_metric': 'rmse', 'nthread': 8}
folds = KFold(n_splits=5, shuffle=True, random_state=2018)
oof_xgb = np.zeros(len(train))
predictions_xgb = np.zeros(len(test))

for fold_, (trn_idx, val_idx) in enumerate(folds.split(X_train, y_train)):
    print("fold n°{}".format(fold_+1))
    trn_data = xgb.DMatrix(X_train[trn_idx], y_train[trn_idx])
    val_data = xgb.DMatrix(X_train[val_idx], y_train[val_idx])
    
    watchlist = [(trn_data, 'train'), (val_data, 'valid_data')]
    clf = xgb.train(dtrain=trn_data, num_boost_round=20000, evals=watchlist, early_stopping_rounds=200, verbose_eval=100, params=xgb_params, feval = myFeval)
    oof_xgb[val_idx] = clf.predict(xgb.DMatrix(X_train[val_idx]))
    predictions_xgb += clf.predict(xgb.DMatrix(X_test)) / folds.n_splits
    
print("CV score: {:<8.8f}".format(mean_squared_error(oof_xgb, y_train_)))
fold n°1
[0]	train-rmse:0.82393	train-myFeval:0.67886	valid_data-rmse:0.79253	valid_data-myFeval:0.62810


D:\anaconda3\Lib\site-packages\xgboost\training.py:38: UserWarning: `feval` is deprecated, use `custom_metric` instead.  They have different behavior when custom objective is also used.See https://xgboost.readthedocs.io/en/latest/tutorials/custom_metric_obj.html for details on the `custom_metric`.
  warnings.warn(
D:\anaconda3\Lib\site-packages\xgboost\core.py:160: UserWarning: [11:59:24] WARNING: C:\b\abs_0fh_d4x2ng\croot\xgboost-split_1713973188995\work\cpp_src\src\objective\regression_obj.cu:209: reg:linear is now deprecated in favor of reg:squarederror.
  warnings.warn(smsg, UserWarning)


[100]	train-rmse:0.74464	train-myFeval:0.55449	valid_data-rmse:0.73502	valid_data-myFeval:0.54025
[200]	train-rmse:0.69770	train-myFeval:0.48679	valid_data-rmse:0.70656	valid_data-myFeval:0.49923
[300]	train-rmse:0.66722	train-myFeval:0.44518	valid_data-rmse:0.69159	valid_data-myFeval:0.47830
[400]	train-rmse:0.64466	train-myFeval:0.41559	valid_data-rmse:0.68242	valid_data-myFeval:0.46570
[500]	train-rmse:0.62700	train-myFeval:0.39312	valid_data-rmse:0.67782	valid_data-myFeval:0.45944
[600]	train-rmse:0.61203	train-myFeval:0.37459	valid_data-rmse:0.67388	valid_data-myFeval:0.45411
[700]	train-rmse:0.59866	train-myFeval:0.35840	valid_data-rmse:0.67140	valid_data-myFeval:0.45078
[800]	train-rmse:0.58712	train-myFeval:0.34471	valid_data-rmse:0.66950	valid_data-myFeval:0.44823
[900]	train-rmse:0.57628	train-myFeval:0.33210	valid_data-rmse:0.66794	valid_data-myFeval:0.44614
[1000]	train-rmse:0.56658	train-myFeval:0.32101	valid_data-rmse:0.66699	valid_data-myFeval:0.44487
[1100]	train-rmse:0.55719	train-myFeval:0.31046	valid_data-rmse:0.66613	valid_data-myFeval:0.44373
[1200]	train-rmse:0.54873	train-myFeval:0.30110	valid_data-rmse:0.66540	valid_data-myFeval:0.44275
[1300]	train-rmse:0.54048	train-myFeval:0.29212	valid_data-rmse:0.66490	valid_data-myFeval:0.44209
[1400]	train-rmse:0.53260	train-myFeval:0.28366	valid_data-rmse:0.66446	valid_data-myFeval:0.44150
[1500]	train-rmse:0.52540	train-myFeval:0.27605	valid_data-rmse:0.66400	valid_data-myFeval:0.44089
[1600]	train-rmse:0.51833	train-myFeval:0.26866	valid_data-rmse:0.66383	valid_data-myFeval:0.44067
[1700]	train-rmse:0.51128	train-myFeval:0.26141	valid_data-rmse:0.66348	valid_data-myFeval:0.44020
[1800]	train-rmse:0.50453	train-myFeval:0.25455	valid_data-rmse:0.66317	valid_data-myFeval:0.43979
[1900]	train-rmse:0.49817	train-myFeval:0.24817	valid_data-rmse:0.66318	valid_data-myFeval:0.43981
[2000]	train-rmse:0.49195	train-myFeval:0.24201	valid_data-rmse:0.66303	valid_data-myFeval:0.43961
[2100]	train-rmse:0.48594	train-myFeval:0.23614	valid_data-rmse:0.66296	valid_data-myFeval:0.43952
[2200]	train-rmse:0.48036	train-myFeval:0.23075	valid_data-rmse:0.66295	valid_data-myFeval:0.43951
[2300]	train-rmse:0.47487	train-myFeval:0.22550	valid_data-rmse:0.66298	valid_data-myFeval:0.43955
[2318]	train-rmse:0.47389	train-myFeval:0.22457	valid_data-rmse:0.66292	valid_data-myFeval:0.43946
fold n°2
[0]	train-rmse:0.81393	train-myFeval:0.66248	valid_data-rmse:0.83294	valid_data-myFeval:0.69378


D:\anaconda3\Lib\site-packages\xgboost\training.py:38: UserWarning: `feval` is deprecated, use `custom_metric` instead.  They have different behavior when custom objective is also used.See https://xgboost.readthedocs.io/en/latest/tutorials/custom_metric_obj.html for details on the `custom_metric`.
  warnings.warn(
D:\anaconda3\Lib\site-packages\xgboost\core.py:160: UserWarning: [11:59:50] WARNING: C:\b\abs_0fh_d4x2ng\croot\xgboost-split_1713973188995\work\cpp_src\src\objective\regression_obj.cu:209: reg:linear is now deprecated in favor of reg:squarederror.
  warnings.warn(smsg, UserWarning)


[100]	train-rmse:0.73830	train-myFeval:0.54508	valid_data-rmse:0.76603	valid_data-myFeval:0.58681
[200]	train-rmse:0.69270	train-myFeval:0.47983	valid_data-rmse:0.73140	valid_data-myFeval:0.53495
[300]	train-rmse:0.66250	train-myFeval:0.43891	valid_data-rmse:0.71361	valid_data-myFeval:0.50923
[400]	train-rmse:0.64021	train-myFeval:0.40987	valid_data-rmse:0.70267	valid_data-myFeval:0.49374
[500]	train-rmse:0.62234	train-myFeval:0.38731	valid_data-rmse:0.69585	valid_data-myFeval:0.48420
[600]	train-rmse:0.60709	train-myFeval:0.36855	valid_data-rmse:0.69103	valid_data-myFeval:0.47752
[700]	train-rmse:0.59430	train-myFeval:0.35319	valid_data-rmse:0.68759	valid_data-myFeval:0.47278
[800]	train-rmse:0.58285	train-myFeval:0.33972	valid_data-rmse:0.68556	valid_data-myFeval:0.46999
[900]	train-rmse:0.57242	train-myFeval:0.32767	valid_data-rmse:0.68383	valid_data-myFeval:0.46763
[1000]	train-rmse:0.56302	train-myFeval:0.31699	valid_data-rmse:0.68251	valid_data-myFeval:0.46582
[1100]	train-rmse:0.55466	train-myFeval:0.30765	valid_data-rmse:0.68151	valid_data-myFeval:0.46446
[1200]	train-rmse:0.54632	train-myFeval:0.29847	valid_data-rmse:0.68079	valid_data-myFeval:0.46347
[1300]	train-rmse:0.53884	train-myFeval:0.29035	valid_data-rmse:0.68027	valid_data-myFeval:0.46277
[1400]	train-rmse:0.53151	train-myFeval:0.28250	valid_data-rmse:0.67981	valid_data-myFeval:0.46214
[1500]	train-rmse:0.52466	train-myFeval:0.27526	valid_data-rmse:0.67950	valid_data-myFeval:0.46171
[1600]	train-rmse:0.51782	train-myFeval:0.26813	valid_data-rmse:0.67900	valid_data-myFeval:0.46104
[1700]	train-rmse:0.51134	train-myFeval:0.26147	valid_data-rmse:0.67886	valid_data-myFeval:0.46085
[1800]	train-rmse:0.50494	train-myFeval:0.25496	valid_data-rmse:0.67881	valid_data-myFeval:0.46078
[1900]	train-rmse:0.49861	train-myFeval:0.24862	valid_data-rmse:0.67856	valid_data-myFeval:0.46044
[2000]	train-rmse:0.49241	train-myFeval:0.24247	valid_data-rmse:0.67838	valid_data-myFeval:0.46019
[2100]	train-rmse:0.48635	train-myFeval:0.23653	valid_data-rmse:0.67825	valid_data-myFeval:0.46002
[2200]	train-rmse:0.48057	train-myFeval:0.23094	valid_data-rmse:0.67806	valid_data-myFeval:0.45976
[2300]	train-rmse:0.47491	train-myFeval:0.22554	valid_data-rmse:0.67805	valid_data-myFeval:0.45975
[2400]	train-rmse:0.46918	train-myFeval:0.22013	valid_data-rmse:0.67793	valid_data-myFeval:0.45959
[2500]	train-rmse:0.46408	train-myFeval:0.21537	valid_data-rmse:0.67802	valid_data-myFeval:0.45971
[2566]	train-rmse:0.46042	train-myFeval:0.21199	valid_data-rmse:0.67809	valid_data-myFeval:0.45980
fold n°3
[0]	train-rmse:0.81545	train-myFeval:0.66495	valid_data-rmse:0.82699	valid_data-myFeval:0.68391


D:\anaconda3\Lib\site-packages\xgboost\training.py:38: UserWarning: `feval` is deprecated, use `custom_metric` instead.  They have different behavior when custom objective is also used.See https://xgboost.readthedocs.io/en/latest/tutorials/custom_metric_obj.html for details on the `custom_metric`.
  warnings.warn(
D:\anaconda3\Lib\site-packages\xgboost\core.py:160: UserWarning: [12:00:19] WARNING: C:\b\abs_0fh_d4x2ng\croot\xgboost-split_1713973188995\work\cpp_src\src\objective\regression_obj.cu:209: reg:linear is now deprecated in favor of reg:squarederror.
  warnings.warn(smsg, UserWarning)


[100]	train-rmse:0.73843	train-myFeval:0.54527	valid_data-rmse:0.76373	valid_data-myFeval:0.58328
[200]	train-rmse:0.69179	train-myFeval:0.47858	valid_data-rmse:0.73156	valid_data-myFeval:0.53517
[300]	train-rmse:0.66140	train-myFeval:0.43745	valid_data-rmse:0.71477	valid_data-myFeval:0.51090
[400]	train-rmse:0.63884	train-myFeval:0.40812	valid_data-rmse:0.70460	valid_data-myFeval:0.49647
[500]	train-rmse:0.62062	train-myFeval:0.38517	valid_data-rmse:0.69778	valid_data-myFeval:0.48690
[600]	train-rmse:0.60568	train-myFeval:0.36685	valid_data-rmse:0.69337	valid_data-myFeval:0.48076
[700]	train-rmse:0.59328	train-myFeval:0.35199	valid_data-rmse:0.69052	valid_data-myFeval:0.47682
[800]	train-rmse:0.58212	train-myFeval:0.33886	valid_data-rmse:0.68814	valid_data-myFeval:0.47354
[900]	train-rmse:0.57236	train-myFeval:0.32760	valid_data-rmse:0.68634	valid_data-myFeval:0.47106
[1000]	train-rmse:0.56341	train-myFeval:0.31743	valid_data-rmse:0.68517	valid_data-myFeval:0.46945
[1100]	train-rmse:0.55465	train-myFeval:0.30763	valid_data-rmse:0.68417	valid_data-myFeval:0.46809
[1200]	train-rmse:0.54672	train-myFeval:0.29890	valid_data-rmse:0.68318	valid_data-myFeval:0.46674
[1300]	train-rmse:0.53884	train-myFeval:0.29035	valid_data-rmse:0.68216	valid_data-myFeval:0.46534
[1400]	train-rmse:0.53153	train-myFeval:0.28252	valid_data-rmse:0.68167	valid_data-myFeval:0.46468
[1500]	train-rmse:0.52451	train-myFeval:0.27511	valid_data-rmse:0.68141	valid_data-myFeval:0.46432
[1600]	train-rmse:0.51747	train-myFeval:0.26777	valid_data-rmse:0.68077	valid_data-myFeval:0.46345
[1700]	train-rmse:0.51073	train-myFeval:0.26085	valid_data-rmse:0.68039	valid_data-myFeval:0.46294
[1800]	train-rmse:0.50427	train-myFeval:0.25428	valid_data-rmse:0.68006	valid_data-myFeval:0.46248
[1900]	train-rmse:0.49816	train-myFeval:0.24816	valid_data-rmse:0.67991	valid_data-myFeval:0.46228
[2000]	train-rmse:0.49182	train-myFeval:0.24189	valid_data-rmse:0.67962	valid_data-myFeval:0.46188
[2100]	train-rmse:0.48562	train-myFeval:0.23583	valid_data-rmse:0.67944	valid_data-myFeval:0.46163
[2200]	train-rmse:0.47992	train-myFeval:0.23033	valid_data-rmse:0.67926	valid_data-myFeval:0.46140
[2300]	train-rmse:0.47411	train-myFeval:0.22478	valid_data-rmse:0.67922	valid_data-myFeval:0.46135
[2358]	train-rmse:0.47054	train-myFeval:0.22141	valid_data-rmse:0.67929	valid_data-myFeval:0.46143
fold n°4
[0]	train-rmse:0.81419	train-myFeval:0.66291	valid_data-rmse:0.83171	valid_data-myFeval:0.69174


D:\anaconda3\Lib\site-packages\xgboost\training.py:38: UserWarning: `feval` is deprecated, use `custom_metric` instead.  They have different behavior when custom objective is also used.See https://xgboost.readthedocs.io/en/latest/tutorials/custom_metric_obj.html for details on the `custom_metric`.
  warnings.warn(
D:\anaconda3\Lib\site-packages\xgboost\core.py:160: UserWarning: [12:00:47] WARNING: C:\b\abs_0fh_d4x2ng\croot\xgboost-split_1713973188995\work\cpp_src\src\objective\regression_obj.cu:209: reg:linear is now deprecated in favor of reg:squarederror.
  warnings.warn(smsg, UserWarning)


[100]	train-rmse:0.73899	train-myFeval:0.54611	valid_data-rmse:0.76583	valid_data-myFeval:0.58649
[200]	train-rmse:0.69395	train-myFeval:0.48157	valid_data-rmse:0.73003	valid_data-myFeval:0.53294
[300]	train-rmse:0.66406	train-myFeval:0.44098	valid_data-rmse:0.71098	valid_data-myFeval:0.50549
[400]	train-rmse:0.64155	train-myFeval:0.41158	valid_data-rmse:0.69926	valid_data-myFeval:0.48896
[500]	train-rmse:0.62370	train-myFeval:0.38900	valid_data-rmse:0.69177	valid_data-myFeval:0.47854
[600]	train-rmse:0.60857	train-myFeval:0.37036	valid_data-rmse:0.68674	valid_data-myFeval:0.47161
[700]	train-rmse:0.59574	train-myFeval:0.35490	valid_data-rmse:0.68317	valid_data-myFeval:0.46672
[800]	train-rmse:0.58467	train-myFeval:0.34184	valid_data-rmse:0.68054	valid_data-myFeval:0.46314
[900]	train-rmse:0.57457	train-myFeval:0.33013	valid_data-rmse:0.67864	valid_data-myFeval:0.46055
[1000]	train-rmse:0.56524	train-myFeval:0.31949	valid_data-rmse:0.67731	valid_data-myFeval:0.45874
[1100]	train-rmse:0.55640	train-myFeval:0.30958	valid_data-rmse:0.67607	valid_data-myFeval:0.45707
[1200]	train-rmse:0.54859	train-myFeval:0.30095	valid_data-rmse:0.67567	valid_data-myFeval:0.45652
[1300]	train-rmse:0.54050	train-myFeval:0.29214	valid_data-rmse:0.67497	valid_data-myFeval:0.45558
[1400]	train-rmse:0.53326	train-myFeval:0.28437	valid_data-rmse:0.67459	valid_data-myFeval:0.45508
[1500]	train-rmse:0.52619	train-myFeval:0.27688	valid_data-rmse:0.67447	valid_data-myFeval:0.45491
[1600]	train-rmse:0.51927	train-myFeval:0.26964	valid_data-rmse:0.67424	valid_data-myFeval:0.45460
[1700]	train-rmse:0.51242	train-myFeval:0.26257	valid_data-rmse:0.67422	valid_data-myFeval:0.45457
[1800]	train-rmse:0.50599	train-myFeval:0.25602	valid_data-rmse:0.67378	valid_data-myFeval:0.45397
[1900]	train-rmse:0.49975	train-myFeval:0.24975	valid_data-rmse:0.67368	valid_data-myFeval:0.45384
[2000]	train-rmse:0.49370	train-myFeval:0.24374	valid_data-rmse:0.67376	valid_data-myFeval:0.45395
[2068]	train-rmse:0.48945	train-myFeval:0.23956	valid_data-rmse:0.67375	valid_data-myFeval:0.45393
fold n°5
[0]	train-rmse:0.82091	train-myFeval:0.67389	valid_data-rmse:0.80489	valid_data-myFeval:0.64785


D:\anaconda3\Lib\site-packages\xgboost\training.py:38: UserWarning: `feval` is deprecated, use `custom_metric` instead.  They have different behavior when custom objective is also used.See https://xgboost.readthedocs.io/en/latest/tutorials/custom_metric_obj.html for details on the `custom_metric`.
  warnings.warn(
D:\anaconda3\Lib\site-packages\xgboost\core.py:160: UserWarning: [12:01:12] WARNING: C:\b\abs_0fh_d4x2ng\croot\xgboost-split_1713973188995\work\cpp_src\src\objective\regression_obj.cu:209: reg:linear is now deprecated in favor of reg:squarederror.
  warnings.warn(smsg, UserWarning)


[100]	train-rmse:0.74096	train-myFeval:0.54903	valid_data-rmse:0.74931	valid_data-myFeval:0.56147
[200]	train-rmse:0.69341	train-myFeval:0.48081	valid_data-rmse:0.72129	valid_data-myFeval:0.52026
[300]	train-rmse:0.66212	train-myFeval:0.43841	valid_data-rmse:0.70749	valid_data-myFeval:0.50054
[400]	train-rmse:0.63931	train-myFeval:0.40871	valid_data-rmse:0.69945	valid_data-myFeval:0.48923
[500]	train-rmse:0.62114	train-myFeval:0.38582	valid_data-rmse:0.69414	valid_data-myFeval:0.48183
[600]	train-rmse:0.60610	train-myFeval:0.36736	valid_data-rmse:0.69065	valid_data-myFeval:0.47699
[700]	train-rmse:0.59310	train-myFeval:0.35177	valid_data-rmse:0.68823	valid_data-myFeval:0.47365
[800]	train-rmse:0.58151	train-myFeval:0.33816	valid_data-rmse:0.68631	valid_data-myFeval:0.47102
[900]	train-rmse:0.57079	train-myFeval:0.32580	valid_data-rmse:0.68496	valid_data-myFeval:0.46916
[1000]	train-rmse:0.56123	train-myFeval:0.31498	valid_data-rmse:0.68392	valid_data-myFeval:0.46774
[1100]	train-rmse:0.55244	train-myFeval:0.30519	valid_data-rmse:0.68290	valid_data-myFeval:0.46636
[1200]	train-rmse:0.54402	train-myFeval:0.29596	valid_data-rmse:0.68194	valid_data-myFeval:0.46504
[1300]	train-rmse:0.53623	train-myFeval:0.28754	valid_data-rmse:0.68147	valid_data-myFeval:0.46440
[1400]	train-rmse:0.52838	train-myFeval:0.27918	valid_data-rmse:0.68073	valid_data-myFeval:0.46339
[1500]	train-rmse:0.52094	train-myFeval:0.27138	valid_data-rmse:0.68026	valid_data-myFeval:0.46276
[1600]	train-rmse:0.51413	train-myFeval:0.26433	valid_data-rmse:0.67992	valid_data-myFeval:0.46229
[1700]	train-rmse:0.50725	train-myFeval:0.25731	valid_data-rmse:0.67938	valid_data-myFeval:0.46155
[1800]	train-rmse:0.50086	train-myFeval:0.25086	valid_data-rmse:0.67905	valid_data-myFeval:0.46111
[1900]	train-rmse:0.49443	train-myFeval:0.24446	valid_data-rmse:0.67880	valid_data-myFeval:0.46078
[2000]	train-rmse:0.48809	train-myFeval:0.23823	valid_data-rmse:0.67859	valid_data-myFeval:0.46048
[2100]	train-rmse:0.48215	train-myFeval:0.23247	valid_data-rmse:0.67830	valid_data-myFeval:0.46009
[2200]	train-rmse:0.47621	train-myFeval:0.22678	valid_data-rmse:0.67795	valid_data-myFeval:0.45962
[2300]	train-rmse:0.47052	train-myFeval:0.22139	valid_data-rmse:0.67806	valid_data-myFeval:0.45977
[2398]	train-rmse:0.46456	train-myFeval:0.21582	valid_data-rmse:0.67804	valid_data-myFeval:0.45974
CV score: 0.45487181
##### lgb

param = {'boosting_type': 'gbdt',
         'num_leaves': 20,
         'min_data_in_leaf': 20, 
         'objective':'regression',
         'max_depth':6,
         'learning_rate': 0.01,
         "min_child_samples": 30,
         
         "feature_fraction": 0.8,
         "bagging_freq": 1,
         "bagging_fraction": 0.8 ,
         "bagging_seed": 11,
         "metric": 'mse',
         "lambda_l1": 0.1,
         "verbosity": -1}
folds = KFold(n_splits=5, shuffle=True, random_state=2018)
oof_lgb = np.zeros(len(X_train_))
predictions_lgb = np.zeros(len(X_test_))

for fold_, (trn_idx, val_idx) in enumerate(folds.split(X_train, y_train)):
    print("fold n°{}".format(fold_+1))
   # print(trn_idx)
   # print(".............x_train.........")
   # print(X_train[trn_idx])
  #  print(".............y_train.........")
  #  print(y_train[trn_idx])
    trn_data = lgb.Dataset(X_train[trn_idx], y_train[trn_idx])
    
    val_data = lgb.Dataset(X_train[val_idx], y_train[val_idx])

    num_round = 10000
    clf = lgb.train(param, trn_data, num_round, valid_sets = [trn_data, val_data])
    oof_lgb[val_idx] = clf.predict(X_train[val_idx], num_iteration=clf.best_iteration)
    
    predictions_lgb += clf.predict(X_test, num_iteration=clf.best_iteration) / folds.n_splits

print("CV score: {:<8.8f}".format(mean_squared_error(oof_lgb, y_train_)))
fold n°1
fold n°2
fold n°3
fold n°4
fold n°5
CV score: 0.47250842
#安装catboost的包
# 打开终端(对于Linux和macOS)或命令提示符/Anaconda Prompt(对于Windows)
# 如果你使用的是pip,执行以下命令:
# pip install -i https://pypi.tuna.tsinghua.edu.cn/simple catboost
# 如果你使用的是conda,执行以下命令:
# conda install -c conda-forge catboost
from catboost import Pool, CatBoostRegressor
# cat_features=[0,2,3,10,11,13,15,16,17,18,19]
from sklearn.model_selection import train_test_split


#X_train_s, X_test_s, y_train_s, y_test_s = train_test_split(X_train_, y_train_, test_size=0.3, random_state=2019)
# train_pool = Pool(X_train_s, y_train_s,cat_features=[0,2,3,10,11,13,15,16,17,18,19])
# val_pool = Pool(X_test_s, y_test_s,cat_features=[0,2,3,10,11,13,15,16,17,18,19])
# test_pool = Pool(X_test_ ,cat_features=[0,2,3,10,11,13,15,16,17,18,19]) 


kfolder = KFold(n_splits=5, shuffle=True, random_state=2019)
oof_cb = np.zeros(len(X_train_))
predictions_cb = np.zeros(len(X_test_))
kfold = kfolder.split(X_train_, y_train_)
fold_=0
#X_train_s, X_test_s, y_train_s, y_test_s = train_test_split(X_train, y_train, test_size=0.3, random_state=2019)
for train_index, vali_index in kfold:
    print("fold n°{}".format(fold_))
    fold_=fold_+1
    k_x_train = X_train[train_index]
    k_y_train = y_train[train_index]
    k_x_vali = X_train[vali_index]
    k_y_vali = y_train[vali_index]
    cb_params = {
         'n_estimators': 100000,
         'loss_function': 'RMSE',
         'eval_metric':'RMSE',
         'learning_rate': 0.05,
         'depth': 5,
         'use_best_model': True,
         'subsample': 0.6,
         'bootstrap_type': 'Bernoulli',
         'reg_lambda': 3
    }
    model_cb = CatBoostRegressor(**cb_params)
    #train the model
    model_cb.fit(k_x_train, k_y_train,eval_set=[(k_x_vali, k_y_vali)],verbose=100,early_stopping_rounds=50)
    oof_cb[vali_index] = model_cb.predict(k_x_vali, ntree_end=model_cb.best_iteration_)
    predictions_cb += model_cb.predict(X_test_, ntree_end=model_cb.best_iteration_) / kfolder.n_splits



print("CV score: {:<8.8f}".format(mean_squared_error(oof_cb, y_train_)))
fold n°0
0:	learn: 0.8175871	test: 0.7820939	best: 0.7820939 (0)	total: 146ms	remaining: 4h 3m
100:	learn: 0.6711041	test: 0.6749289	best: 0.6749289 (100)	total: 731ms	remaining: 12m 2s
200:	learn: 0.6410910	test: 0.6688829	best: 0.6686703 (190)	total: 1.67s	remaining: 13m 47s
300:	learn: 0.6130819	test: 0.6669464	best: 0.6668201 (282)	total: 2.57s	remaining: 14m 10s
400:	learn: 0.5895197	test: 0.6666901	best: 0.6663658 (371)	total: 3.81s	remaining: 15m 45s
500:	learn: 0.5684832	test: 0.6657841	best: 0.6654600 (478)	total: 4.98s	remaining: 16m 29s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6654599993
bestIteration = 478

Shrink model to first 479 iterations.
fold n°1
0:	learn: 0.8107754	test: 0.8172376	best: 0.8172376 (0)	total: 7.99ms	remaining: 13m 18s
100:	learn: 0.6715406	test: 0.6800052	best: 0.6800052 (100)	total: 1.49s	remaining: 24m 35s
200:	learn: 0.6428284	test: 0.6699391	best: 0.6699391 (200)	total: 2.69s	remaining: 22m 18s
300:	learn: 0.6144500	test: 0.6663790	best: 0.6662390 (298)	total: 3.94s	remaining: 21m 46s
400:	learn: 0.5905343	test: 0.6643743	best: 0.6641256 (388)	total: 5.16s	remaining: 21m 21s
500:	learn: 0.5703917	test: 0.6632232	best: 0.6632137 (497)	total: 6.27s	remaining: 20m 45s
600:	learn: 0.5523517	test: 0.6626011	best: 0.6620170 (579)	total: 7.44s	remaining: 20m 30s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6620170222
bestIteration = 579

Shrink model to first 580 iterations.
fold n°2
0:	learn: 0.8046145	test: 0.8370989	best: 0.8370989 (0)	total: 39.5ms	remaining: 1h 5m 47s
100:	learn: 0.6652528	test: 0.7059731	best: 0.7059731 (100)	total: 1.26s	remaining: 20m 45s
200:	learn: 0.6356395	test: 0.6958527	best: 0.6958527 (200)	total: 2.57s	remaining: 21m 17s
300:	learn: 0.6079444	test: 0.6913800	best: 0.6913800 (300)	total: 3.91s	remaining: 21m 36s
400:	learn: 0.5848883	test: 0.6900293	best: 0.6900293 (400)	total: 5.16s	remaining: 21m 20s
500:	learn: 0.5637398	test: 0.6896119	best: 0.6889243 (455)	total: 6.35s	remaining: 21m 1s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6889243403
bestIteration = 455

Shrink model to first 456 iterations.
fold n°3
0:	learn: 0.8156897	test: 0.7928103	best: 0.7928103 (0)	total: 7.7ms	remaining: 12m 50s
100:	learn: 0.6666901	test: 0.6886018	best: 0.6886018 (100)	total: 1.27s	remaining: 20m 59s
200:	learn: 0.6349422	test: 0.6834388	best: 0.6834388 (200)	total: 2.61s	remaining: 21m 35s
300:	learn: 0.6054434	test: 0.6814056	best: 0.6806466 (259)	total: 3.98s	remaining: 22m
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.680646584
bestIteration = 259

Shrink model to first 260 iterations.
fold n°4
0:	learn: 0.8073054	test: 0.8273646	best: 0.8273646 (0)	total: 7.88ms	remaining: 13m 7s
100:	learn: 0.6617636	test: 0.7072268	best: 0.7072268 (100)	total: 1.46s	remaining: 24m 1s
200:	learn: 0.6326520	test: 0.6986823	best: 0.6985780 (193)	total: 2.87s	remaining: 23m 46s
300:	learn: 0.6047984	test: 0.6949317	best: 0.6949112 (296)	total: 4.16s	remaining: 22m 56s
400:	learn: 0.5809457	test: 0.6927416	best: 0.6925554 (375)	total: 5.45s	remaining: 22m 32s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6925554216
bestIteration = 375

Shrink model to first 376 iterations.
CV score: 0.45983020
from sklearn import linear_model
# 将lgb和xgb和ctb的结果进行stacking
train_stack = np.vstack([oof_lgb,oof_xgb,oof_cb]).transpose()
test_stack = np.vstack([predictions_lgb, predictions_xgb,predictions_cb]).transpose()


folds_stack = RepeatedKFold(n_splits=5, n_repeats=2, random_state=2018)
oof_stack = np.zeros(train_stack.shape[0])
predictions = np.zeros(test_stack.shape[0])

for fold_, (trn_idx, val_idx) in enumerate(folds_stack.split(train_stack,y_train)):
    print("fold {}".format(fold_))
    trn_data, trn_y = train_stack[trn_idx], y_train[trn_idx]
    val_data, val_y = train_stack[val_idx], y_train[val_idx]
    
    clf_3 = linear_model.BayesianRidge()
    #clf_3 =linear_model.Ridge()
    clf_3.fit(trn_data, trn_y)
    
    oof_stack[val_idx] = clf_3.predict(val_data)
    predictions += clf_3.predict(test_stack) / 10
    
print("CV score: {:<8.8f}".format(mean_squared_error(oof_stack, y_train_)))
fold 0
fold 1
fold 2
fold 3
fold 4
fold 5
fold 6
fold 7
fold 8
fold 9
CV score: 0.45369897
result=list(predictions)
result=list(map(lambda x: x + 1, result))
test_sub["happiness"]=result
test_sub.to_csv("submit_20240502.csv", index=False)
# 绘图案例 an example of matplotlib
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from scipy.special import jn
from IPython.display import display, clear_output
import time
x = np.linspace(0,5)
f, ax = plt.subplots()
ax.set_title("Bessel functions")

for n in range(1,10):
    time.sleep(1)
    ax.plot(x, jn(x,n))
    clear_output(wait=True)
    display(f)

# close the figure at the end, so we don't get a duplicate
# of the last plot
plt.close()

在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

腾飞开源

你的鼓励将是我创作的最大动力!

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值