【Kaggle笔记】New York City Taxi Trip Duration

比赛题目


代码

# -*- coding: utf-8 -*-

import pandas as pd

train = pd.read_csv("train.csv", header=0)
test = pd.read_csv("test.csv", header=0)

# 查看数据的字段信息, dropoff_datetime,id可以去掉
# print(train.columns)
# print(test.columns)

# 查看数据是否有缺失
# print(train.info())
# print(test.info())

# 选取特征值
X_train = train.drop(['dropoff_datetime', 'trip_duration', 'id'], axis=1)
y_train = train['trip_duration']
X_test = test.drop(['id'], axis=1)

# print(X_train.shape)
# print(X_test.shape)
# print(y_train.head())

# 特征值处理
X_train['month'] = pd.DatetimeIndex(X_train.pickup_datetime).month
X_train['day'] = pd.DatetimeIndex(X_train.pickup_datetime).dayofweek
X_train['hour'] = pd.DatetimeIndex(X_train.pickup_datetime).hour
X_train['store_and_fwd_flag'].replace('Y', 1, inplace=True)
X_train['store_and_fwd_flag'].replace('N', 0, inplace=True)
X_train = X_train.drop(['pickup_datetime'], axis=1)

X_test['month'] = pd.DatetimeIndex(X_test.pickup_datetime).month
X_test['day'] = pd.DatetimeIndex(X_test.pickup_datetime).dayofweek
X_test['hour'] = pd.DatetimeIndex(X_test.pickup_datetime).hour
X_test['store_and_fwd_flag'].replace('Y', 1, inplace=True)
X_test['store_and_fwd_flag'].replace('N', 0, inplace=True)
X_test = X_test.drop(['pickup_datetime'], axis=1)
# print(X_test['store_and_fwd_flag'].value_counts())
# print(X_train.head())
# print(X_test.head())
# print(X_train.shape)
# print(X_test.shape)

# 使用RandomForestRegressor进行回归预测
from sklearn.ensemble import RandomForestRegressor
rfr = RandomForestRegressor()
rfr.fit(X_train, y_train)
rfr_y_predict = rfr.predict(X_test)


# 输出结果
gbr_submission = pd.DataFrame({'id': test['id'], 'trip_duration': rfr_y_predict})
gbr_submission.to_csv('rfr_submission.csv', index=False)
  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值