机器学习-智慧交通-lgb

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
#数据探索
data= pd.read_csv('E:\python_projiect\grade3_semester2\ML\dataset\智慧交通+天文数据挖掘数据集\it\data\gy_contest_link_info.txt',delimiter=';')
print(data.head(5))
print(data.info())
print(data.describe())
f,ax = plt.subplots(1,2,figsize=(5,5))
data.length.plot.hist(ax=ax[0],bins=20,edgecolor='black',color='#1c6cab')
ax[0].set_title('link length distribution')
data.width.plot.hist(ax=ax[1],color='#fc452b',bins=20,edgecolor='black')
ax[1].set_title('link width distribution')

traveltime_data=pd.read_csv('E:\python_projiect\grade3_semester2\ML\dataset\智慧交通+天文数据挖掘数据集\it\data\quaterfinal_gy_cmp_training_traveltime.txt',delimiter=';')
# print(traveltime_data.head(),"\n",traveltime_data.info(),"\n")

traveltime_data['time_interval_begin']=pd.to_datetime(traveltime_data['time_interval'].map(lambda x:x [1:20],'ignore'))
traveltime_data['hour'] = traveltime_data['time_interval_begin'].dt.hour
traveltime_data['week_day']= traveltime_data['time_interval_begin'].map(lambda x: x.weekday()+1)
traveltime_data['month'] = traveltime_data['time_interval_begin'].dt.month
traveltime_data['year'] = traveltime_data['time_interval_begin'].dt.year
#seq2seq
traveltime_data['travel_time_log1p'] = np.log1p(traveltime_data['travel_time'])
traveltime_data.describe()

new_traveltime_data =traveltime_data[traveltime_data['year']==2017]
train = new_traveltime_data[new_traveltime_data['month']==4]
val = new_traveltime_data[new_traveltime_data['month']==5]

holiday = ['2017-04-02', '2017-04-03', '2017-04-04', '2017-04-29', '2017-04-30', '2017-05-01',
           '2017-05-28', '2017-05-29', '2017-05-30']


def is_holiday(data):
    if data in holiday:
        return 1
    else:
        return 0


def is_weekday(data):
    if data == 6 or data == 7:
        return 1
    else:
        return 0


def is_weekday_holiday(x, y):
    if x == 1 and y == 1:
        return 1
    else:
        return 0


def basic_feature(data):
    data['is_holiday'] = data['date'].apply(is_holiday)
    data['is_weekday'] = data['week_day'].apply(is_weekday)

    return data


train_1 = basic_feature(train)
val_1 = basic_feature(val)

import lightgbm as lgb


def mape_ln(y, d):
    c = d.get_label()
    result = np.sum(np.abs(np.expm1(y) - np.abs(np.expm1(c))) / np.abs(np.expm1(c))) / len(c)
    return 'mape', result, False


lgb_params = {
    'learning_rate': 0.02,
    'boosting type': 'gbdt',
    'objective': 'regression_l1',
    'num_leaves': 50,
    'feature_fraction': 0.8,
    'bagging_fraction': 0.8,
    'bagging_freq': 5,
    'verbose': -1
}


def lgb_train(tmp, tmp_1):
    FEATS_EXCLUDED = ['link_ID', 'date', 'time_interval', 'time_interval_begin', 'travel_time', 'travel_time_log1p',
                      'day', 'link_class', 'year']
    train_features = [c for c in tmp.columns if c not in FEATS_EXCLUDED]

    tmp_2 = tmp[tmp['hour'] == 8]
    tmp_3 = tmp_1[tmp_1['hour'] == 8]

    train_feat1 = tmp_2[train_features]
    train_feat2 = tmp_3[train_features]

    lgb_train1 = lgb.Dataset(train_feat1, tmp_2['travel_time_log1p'])
    lgb_train2 = lgb.Dataset(train_feat2, tmp_3['travel_time_log1p'])
    print(lgb_train1)
    gbm = lgb.train(lgb_params,
                    train_set=lgb_train1,
                    num_boost_round=10000,
                    feval=mape_ln,
                    valid_sets=lgb_train2,
                    verbose_eval=200,
                    early_stopping_rounds=200)

    tmp_3['pred'] = np.expm1(gbm.predict(train_feat2))
    tmp_3['mean'] = np.abs(tmp_3['pred'] - tmp_3['travel_time']) / tmp_3['travel_time']

    return tmp_3

result = lgb_train(train_1,val_1)
result['mean'].mean()

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值