前言
关于一部分算法实践的代码整理
关于算法实践的几个步骤关键步骤
载入python相关的包
# 基础
import pandas as pd
import numpy as np
from collections import defaultdict
import matplotlib.pyplot as plt
import seaborn as sns
import gc
# import sklearn
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
# X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.25, random_state = 0)
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error as mse
from lightgbm.sklearn import LGBMRegressor
# import boosting algorithm
import lightgbm as lgb
import xgboost as xgb
import catboost as ctb
载入数据
df=pd.read_csv(filepath)
print(df.info())
数据清洗
0、数据描述
trainset.describe().apply(lambda x:round(x,2) if isinstance(x,'int') else x)
1、查看特征空值情况
df_nulldata='\n'.join(['{} feature have: {} null data'.format(col,df[col].isnull().sum()) for col in df.column