使用adaboost做Kaggle中Titanic

这里测试了使用adaboost的效果,代码如下:

#coding=utf-8
import pandas as pd
from pandas import Series,DataFrame 
import random
import numpy as np
from datetime import date
import datetime as dt
from numpy import nan as NA
from sklearn.tree import DecisionTreeRegressor  
from sklearn.ensemble import RandomForestRegressor  
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingRegressor

import warnings
warnings.filterwarnings("ignore")

#读取数据
traindata = pd.read_csv("train.csv",header=0)
testdata = pd.read_csv("test.csv",header=0)

traindata.Age[traindata.Age.isnull()] = 30
print(traindata.Age.describe())
testdata.Age[testdata.Age.isnull()] = 30
print(testdata.Age.describe())

#新加一列,儿童,年轻还是老年人
traindata['Age1'] = 1
testdata['Age1'] = 1
traindata.Age1[traindata.Age<12] = 0
testdata.Age1[testdata.Age<12] = 0
traindata.Age1[traindata.Age>50] = 2
testdata.Age1[testdata.Age>50] = 2

#新加一列,是否人多
traindata['Parch1'] = 0
testdata['Parch1'] = 0
traindata.Parch1[traindata.Parch>1] = 1
testdata.Parch1[testdata.Parch>1] = 1

print(traindata.describe())

traindata.Sex[traindata.Sex=='male'] = 0
traindata.Sex[traindata.Sex=='female'] = 1
print(traindata.Sex.describe())
testdata.Sex[testdata.Sex=='male'] = 0
testdata.Sex[testdata.Sex=='female'] = 1
print(testdata.Sex.describe())

testdata.Fare[testdata.Fare.isnull()]=35
print(testdata.Fare.describe())

UseFlag = traindata['Survived'].values
UseFeature = traindata[['Pclass','Sex','Age','SibSp','Parch','Fare','Age1','Parch1']].values

from sklearn import preprocessing
scaler = preprocessing.StandardScaler().fit(UseFeature)
scaler.transform(UseFeature)

TestFeature = testdata[['Pclass','Sex','Age','SibSp','Parch','Fare','Age1','Parch1']].values
scaler.transform(TestFeature)

from sklearn.ensemble import AdaBoostClassifier  
clf = AdaBoostClassifier(n_estimators=100) #迭代100次  
clf.fit(UseFeature,UseFlag)#进行模型的训练  

temp = clf.predict(TestFeature)

testdata['tadaboost']=temp
temp = clf.predict(TestFeature)

testdata['Survived']=0
testdata.Survived = testdata.tadaboost
testdata.Survived = testdata.Survived.astype(int)

outdata = testdata[['PassengerId','Survived']]#提取出需要的列
outdata.to_csv("test_2018_2_26_adaboost.csv",index=False,header=True)#保存数据集

最终结果:0.74162
这里写图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值