DCIC Task3

Table of Contents

EDA

导入常用库

import os, codecs
import pandas as pd
import numpy as np

%pylab inline 
'''
% 表示这行代码是魔法命令magic
inline 表示将图表内嵌到Nb中
pylab 表示matplotlib与Ipython联合开发的使用matlab的套件
'''
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('svg') # 使用matplotlib 批量显示svg图片

from matplotlib import font_manager as fm, rcParams
import matplotlib.pyplot as plt
Populating the interactive namespace from numpy and matplotlib
#批量读取数据集并聚合
PATH = 'G:/Data Minning/DCIC2021/' 
bike_track = pd.concat([
    pd.read_csv(PATH + 'gxdc_gj20201221.csv'),
    pd.read_csv(PATH + 'gxdc_gj20201222.csv'),
    pd.read_csv(PATH + 'gxdc_gj20201223.csv'),
    pd.read_csv(PATH + 'gxdc_gj20201224.csv'),
    pd.read_csv(PATH + 'gxdc_gj20201225.csv')
])
#排序显示
# 按照单车ID和时间进行排序
bike_track = bike_track.sort_values(['BICYCLE_ID', 'LOCATING_TIME'])

数据概览

本次一共有103085辆单车,累计的11920885条轨迹

bike_track['BICYCLE_ID'].nunique()
103085
bike_track.describe()
LATITUDELONGITUDEdayhour
count1.192088e+071.192088e+071.192088e+071.192088e+07
mean2.450373e+011.181120e+022.305376e+017.742647e+00
std3.822377e-025.177484e-021.475251e+009.090383e-01
min2.442406e+011.179086e+022.100000e+016.000000e+00
25%2.447858e+011.180859e+022.200000e+017.000000e+00
50%2.449319e+011.181127e+022.300000e+018.000000e+00
75%2.452017e+011.181468e+022.400000e+018.000000e+00
max2.610928e+011.193015e+022.500000e+019.000000e+00
bike_track.tail()
BICYCLE_IDLOCATING_TIMELATITUDELONGITUDEsourceLOCATING_TIME1datedaytimehour
121781ffff8731363147633f83d75bce9cdd662020-12-25 09:20:2324.484973118.175061CSXZGLZFJ2020-12-25 09:20:232020-12-252509:20:239
121782ffff8731363147633f83d75bce9cdd662020-12-25 09:20:3824.485223118.175275CSXZGLZFJ2020-12-25 09:20:382020-12-252509:20:389
121783ffff8731363147633f83d75bce9cdd662020-12-25 09:20:5324.485631118.175595CSXZGLZFJ2020-12-25 09:20:532020-12-252509:20:539
121784ffff8731363147633f83d75bce9cdd662020-12-25 09:21:0824.486110118.175923CSXZGLZFJ2020-12-25 09:21:082020-12-252509:21:089
121785ffff8731363147633f83d75bce9cdd662020-12-25 09:21:2324.486341118.176282CSXZGLZFJ2020-12-25 09:21:232020-12-252509:21:239

地图可视化

import folium
m = folium.Map(location=[24.482426, 118.157606], zoom_start=12)
my_PolyLine=folium.PolyLine(locations=bike_track[bike_track['BICYCLE_ID'] == 'ffff8731363147633f83d75bce9cdd66'][['LATITUDE', 'LONGITUDE']].values,weight=5)
m.add_children(my_PolyLine)

在这里插入图片描述

对停车点的识别与聚合

思路:

def bike_fence_format(s):
    s = s.replace('[', '').replace(']', '').split(',')
    s = np.array(s).astype(float).reshape(5, -1)
    return s

# 共享单车停车点位(电子围栏)数据
bike_fence = pd.read_csv(PATH + 'gxdc_tcd.csv')
bike_fence['FENCE_LOC'] = bike_fence['FENCE_LOC'].apply(bike_fence_format)

# 共享单车订单数据
bike_order = pd.read_csv(PATH + 'gxdc_dd.csv')
bike_order = bike_order.sort_values(['BICYCLE_ID', 'UPDATE_TIME'])
import geohash
bike_order['geohash'] = bike_order.apply(lambda x: 
                        geohash.encode(x['LATITUDE'], x['LONGITUDE'], precision=9), axis=1)
from geopy.distance import geodesic

bike_fence['MIN_LATITUDE'] = bike_fence['FENCE_LOC'].apply(lambda x: np.min(x[:, 1]))
bike_fence['MAX_LATITUDE'] = bike_fence['FENCE_LOC'].apply(lambda x: np.max(x[:, 1]))

bike_fence['MIN_LONGITUDE'] = bike_fence['FENCE_LOC'].apply(lambda x: np.min(x[:, 0]))
bike_fence['MAX_LONGITUDE'] = bike_fence['FENCE_LOC'].apply(lambda x: np.max(x[:, 0]))

bike_fence['FENCE_AREA'] = bike_fence.apply(lambda x: geodesic(
    (x['MIN_LATITUDE'], x['MIN_LONGITUDE']), (x['MAX_LATITUDE'], x['MAX_LONGITUDE'])
).meters, axis=1)

bike_fence['FENCE_CENTER'] = bike_fence['FENCE_LOC'].apply(
    lambda x: np.mean(x[:-1, ::-1], 0)
)
import geohash
bike_order['geohash'] = bike_order.apply(
    lambda x: geohash.encode(x['LATITUDE'], x['LONGITUDE'], precision=6), 
axis=1)

bike_fence['geohash'] = bike_fence['FENCE_CENTER'].apply(
    lambda x: geohash.encode(x[0], x[1], precision=6)
)
# bike_order
geohash.encode(24.521156, 118.140385, precision=6), \
geohash.encode(24.521156, 118.140325, precision=6)
('wsk52r', 'wsk52r')
bike_order['UPDATE_TIME'] = pd.to_datetime(bike_order['UPDATE_TIME'])
bike_order['DAY'] = bike_order['UPDATE_TIME'].dt.day.astype(object)
bike_order['DAY'] = bike_order['DAY'].apply(str)

bike_order['HOUR'] = bike_order['UPDATE_TIME'].dt.hour.astype(object)
bike_order['HOUR'] = bike_order['HOUR'].apply(str)
bike_order['HOUR'] = bike_order['HOUR'].str.pad(width=2,side='left',fillchar='0')

bike_order['DAY_HOUR'] = bike_order['DAY'] + bike_order['HOUR']
bike_inflow = pd.pivot_table(bike_order[bike_order['LOCK_STATUS'] == 1], 
                   values='LOCK_STATUS', index=['geohash'],
                    columns=['DAY_HOUR'], aggfunc='count', fill_value=0
)

bike_outflow = pd.pivot_table(bike_order[bike_order['LOCK_STATUS'] == 0], 
                   values='LOCK_STATUS', index=['geohash'],
                    columns=['DAY_HOUR'], aggfunc='count', fill_value=0
)
bike_inflow.loc['wsk596'].plot()
bike_outflow.loc['wsk596'].plot()
plt.xticks(list(range(bike_inflow.shape[1])), bike_inflow.columns, rotation=40)
plt.legend(['Inflow', 'OutFlow'])

在这里插入图片描述

潮汐状态可视化

bike_order['UPDATE_TIME'] = pd.to_datetime(bike_order['UPDATE_TIME'])
bike_order['DAY'] = bike_order['UPDATE_TIME'].dt.day.astype(object)
bike_order['DAY'] = bike_order['DAY'].apply(str)

bike_order['HOUR'] = bike_order['UPDATE_TIME'].dt.hour.astype(object)
bike_order['HOUR'] = bike_order['HOUR'].apply(str)
bike_order['HOUR'] = bike_order['HOUR'].str.pad(width=2,side='left',fillchar='0')

bike_order['DAY_HOUR'] = bike_order['DAY'] + bike_order['HOUR']
import folium
from folium import plugins
from folium.plugins import HeatMap

map_hooray = folium.Map(location=[24.482426, 118.157606], zoom_start=14)
HeatMap(bike_order.loc[(bike_order['DAY_HOUR'] == '2106') & (bike_order['LOCK_STATUS'] == 1), 
                   ['LATITUDE', 'LONGITUDE']]).add_to(map_hooray)

for data in bike_fence['FENCE_LOC'].values[::10]:
    folium.Marker(
        list(data[0, ::-1])
    ).add_to(map_hooray)

map_hooray

在这里插入图片描述

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值