数据介绍
https://www.kaggle.com/c/allstate-claims-severity
keggle比赛保险赔偿预测
观察数据
导入库
#导入库
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression#逻辑回归模型
from sklearn.ensemble import RandomForestClassifier#随机森林模型
from sklearn.metrics import roc_auc_score as AUC#AUC统计
from sklearn.metrics import mean_absolute_error
from sklearn.decomposition import PCA#主成分分析
from sklearn.preprocessing import LabelEncoder,LabelBinarizer
from sklearn.model_selection import cross_val_score
import xgboost as xgb
from scipy import stats
import seaborn as sns
from copy import deepcopy
%matplotlib inline
读取数据
train = pd.read_csv("E:Data\\kaggle_Allstate_Claims_Severity\\train.csv")
test = pd.read_csv("E:Data\\kaggle_Allstate_Claims_Severity\\test.csv")
训练集 188318*132
132个属性,1个id,116个类别属性,14个数值属性,最后的目标为loss属性对应的值