kaggle竞赛:泰坦尼克数据

import pandas as pd


titanic = pd.read_csv('/home/zengxl/Desktop/titanic/train.csv')
#print(titanic.head(4))
titanic['Age'] = titanic['Age'].fillna(titanic['Age'].median())

titanic.loc[titanic['Sex']=='male','Sex'] = 0
titanic.loc[titanic['Sex']=='female','Sex'] = 1
#print(titanic['Sex'].unique())

titanic['Embarked'] = titanic['Embarked'].fillna('S')
titanic.loc[titanic['Embarked']=='S','Embarked'] = 0
titanic.loc[titanic['Embarked']=='C','Embarked'] = 1
titanic.loc[titanic['Embarked']=='Q','Embarked'] = 2
#print(titanic['Embarked'].unique())

from sklearn.linear_model import LinearRegression
from sklearn.cross_validation import KFold

predictions = []
alg = LinearRegression()
predictors = ['Sex','Embarked','Age']
kf = KFold(titanic.shape[0],n_folds=3,random_state=1)
for train,test in kf:
    train_predictors = titanic[predictors].iloc[train,:]
    test_target = titanic['Survived'].iloc[train]
    alg.fit(train_predictors,test_target)
    predictorss = alg.predict(titanic[predictors].iloc[test,:])
    predictions.append(predictorss)

import numpy as np

predictions = np.concatenate(predictions,axis=0)
predictions[predictions>0.5] = 1
predictions[predictions<=0.5] = 0

#print(predictions)
#accuracy = sum(predictions[predictions==titanic['Survived']])/len(predictions)
#print(accuracy)
#print(len(predictions))

titanic_1 = pd.read_csv('/home/zengxl/Desktop/titanic/test.csv')
#print(titanic_1.describe())
titanic_1['Age'] = titanic_1['Age'].fillna(titanic_1['Age'].median())

titanic_1.loc[titanic_1['Sex']=='male','Sex'] = 0
titanic_1.loc[titanic_1['Sex']=='female','Sex'] = 1
#print('titanic_1:',titanic_1['Sex'].unique())

titanic_1['Embarked'] = titanic_1['Embarked'].fillna('S')
titanic_1.loc[titanic_1['Embarked']=='S','Embarked'] = 0
titanic_1.loc[titanic_1['Embarked']=='C','Embarked'] = 1
titanic_1.loc[titanic_1['Embarked']=='Q','Embarked'] = 2
#print('titanic_1::',titanic_1['Embarked'].unique())
#print(titanic_1[predictors])
test_1 = alg.predict(titanic_1[predictors])
#print(test_1)

predictions_1 = []
predictions_1.append(test_1)
                                                                                                                       

  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值