导入模块:
import pandas as pd
import numpy as np #pandas依赖于numpy
from sklearn import preprocessing
import xgboost as xgb
常用功能简介:
#load train and test
train = pd.read_csv('train.csv', index_col=0)
#index_col=0,指明第1列是索引
test = pd.read_csv('test.csv', index_col=0)
#type(train)=pandas.core.frame.DataFrame(本质是hash)
#train.head(n),获取train前n行的数据
#train.head(0),若n=0,表示获取整个train数据
#train.tail(n),获取train后n行的数据
#train.describe(),获取train的统计信息,如下:
'''
Hazard T1_V1 T1_V2 T1_V3 T1_V10
count 50999.000000 50999.000000 50999.000000 50999.000000 50999.000000
mean 4.022785 9.722093 12.847585 3