#读取taxiod订单表
#删除练习
import pandas as pd
taxiod = pd.read_csv(r'data-sample/TaxiOD.csv',header=None) # 要加上后缀名 .csv
taxiod.columns=['VehicleNum','Stime','SLng','SLat','ELng','ELat','Etime']
taxiod
C:\Program Files (x86)\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3146: DtypeWarning: Columns (0,2,3,4,5) have mixed types.Specify dtype option on import or set low_memory=False.
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
VehicleNum | Stime | SLng | SLat | ELng | ELat | Etime | |
---|---|---|---|---|---|---|---|
0 | VehicleNum | Stime | SLng | SLat | ELng | ELat | Etime |
1 | 22223 | 00:03:23 | 114.16746499999999 | 22.562468 | 114.22523500000001 | 22.55275 | 00:10:48 |
2 | 22223 | 00:11:33 | 114.22715 | 22.554167 | 114.22921799999999 | 22.560217 | 00:15:19 |
3 | 22223 | 00:17:13 | 114.23135400000001 | 22.562166 | 114.255798 | 22.590967000000003 | 00:29:06 |
4 | 22223 | 00:36:45 | 114.240196 | 22.56365 | 114.119965 | 22.566668 | 00:54:42 |
... | ... | ... | ... | ... | ... | ... | ... |
464714 | 36947 | 22:39:12 | 114.006 | 22.5481 | 113.996 | 22.5371 | 22:46:25 |
464715 | 36947 | 22:49:38 | 113.995 | 22.535 | 113.922 | 22.4965 | 23:13:15 |
464716 | 36947 | 23:24:24 | 113.921 | 22.5135 | 113.93 | 22.4942 | 23:30:32 |
464717 | 36947 | 23:37:09 | 113.928 | 22.5126 | 113.911 | 22.4879 | 23:49:10 |
464718 | 36947 | 23:52:18 | 113.91 | 22.4876 | NaN | NaN | NaN |
464719 rows × 7 columns
taxiod=taxiod.drop([0]) # 删除第一行
taxiod.index = range(len(taxiod)) # 重新排序索引
taxiod
VehicleNum | Stime | SLng | SLat | ELng | ELat | Etime | |
---|---|---|---|---|---|---|---|
0 | 22223 | 00:03:23 | 114.16746499999999 | 22.562468 | 114.22523500000001 | 22.55275 | 00:10:48 |
1 | 22223 | 00:11:33 | 114.22715 | 22.554167 | 114.22921799999999 | 22.560217 | 00:15:19 |
2 | 22223 | 00:17:13 | 114.23135400000001 | 22.562166 | 114.255798 | 22.590967000000003 | 00:29:06 |
3 | 22223 | 00:36:45 | 114.240196 | 22.56365 | 114.119965 | 22.566668 | 00:54:42 |
4 | 22223 | 01:01:14 | 114.13541399999998 | 22.575933 | 114.166748 | 22.608267 | 01:08:17 |
... | ... | ... | ... | ... | ... | ... | ... |
464713 | 36947 | 22:39:12 | 114.006 | 22.5481 | 113.996 | 22.5371 | 22:46:25 |
464714 | 36947 | 22:49:38 | 113.995 | 22.535 | 113.922 | 22.4965 | 23:13:15 |
464715 | 36947 | 23:24:24 | 113.921 | 22.5135 | 113.93 | 22.4942 | 23:30:32 |
464716 | 36947 | 23:37:09 | 113.928 | 22.5126 | 113.911 | 22.4879 | 23:49:10 |
464717 | 36947 | 23:52:18 | 113.91 | 22.4876 | NaN | NaN | NaN |
464718 rows × 7 columns
taxiod=taxiod[-taxiod['ELng'].isnull()] # 删掉最后一行为空的 方法 先找到为空的 然后索引 然后去掉 然后赋值给taxiod
tmp= pd.to_datetime(taxiod['Stime'])
tmp
0 2021-03-03 00:03:23
1 2021-03-03 00:11:33
2 2021-03-03 00:17:13
3 2021-03-03 00:36:45
4 2021-03-03 01:01:14
...
464712 2021-03-03 22:08:22
464713 2021-03-03 22:39:12
464714 2021-03-03 22:49:38
464715 2021-03-03 23:24:24
464716 2021-03-03 23:37:09
Name: Stime, Length: 464717, dtype: datetime64[ns]
tmp1=pd.to_datetime(taxiod['Etime'])
tmp1
0 2021-03-03 00:10:48
1 2021-03-03 00:15:19
2 2021-03-03 00:29:06
3 2021-03-03 00:54:42
4 2021-03-03 01:08:17
...
464712 2021-03-03 22:36:53
464713 2021-03-03 22:46:25
464714 2021-03-03 23:13:15
464715 2021-03-03 23:30:32
464716 2021-03-03 23:49:10
Name: Etime, Length: 464717, dtype: datetime64[ns]
Duration=tmp1-tmp
Duration
taxiod['Duration']=Duration
taxiod
<ipython-input-10-8b258a85ed6d>:3: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
taxiod['Duration']=Duration
VehicleNum | Stime | SLng | SLat | ELng | ELat | Etime | Duration | |
---|---|---|---|---|---|---|---|---|
0 | 22223 | 00:03:23 | 114.16746499999999 | 22.562468 | 114.22523500000001 | 22.55275 | 00:10:48 | 0 days 00:07:25 |
1 | 22223 | 00:11:33 | 114.22715 | 22.554167 | 114.22921799999999 | 22.560217 | 00:15:19 | 0 days 00:03:46 |
2 | 22223 | 00:17:13 | 114.23135400000001 | 22.562166 | 114.255798 | 22.590967000000003 | 00:29:06 | 0 days 00:11:53 |
3 | 22223 | 00:36:45 | 114.240196 | 22.56365 | 114.119965 | 22.566668 | 00:54:42 | 0 days 00:17:57 |
4 | 22223 | 01:01:14 | 114.13541399999998 | 22.575933 | 114.166748 | 22.608267 | 01:08:17 | 0 days 00:07:03 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
464712 | 36947 | 22:08:22 | 113.914 | 22.5314 | 113.997 | 22.5456 | 22:36:53 | 0 days 00:28:31 |
464713 | 36947 | 22:39:12 | 114.006 | 22.5481 | 113.996 | 22.5371 | 22:46:25 | 0 days 00:07:13 |
464714 | 36947 | 22:49:38 | 113.995 | 22.535 | 113.922 | 22.4965 | 23:13:15 | 0 days 00:23:37 |
464715 | 36947 | 23:24:24 | 113.921 | 22.5135 | 113.93 | 22.4942 | 23:30:32 | 0 days 00:06:08 |
464716 | 36947 | 23:37:09 | 113.928 | 22.5126 | 113.911 | 22.4879 | 23:49:10 | 0 days 00:12:01 |
464717 rows × 8 columns
taxiod.rename(columns={'duration': 'Duration'}, inplace=True) # 重命名某列
C:\Program Files (x86)\Anaconda3\lib\site-packages\pandas\core\frame.py:4296: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
return super().rename(
taxiod
VehicleNum | Stime | SLng | SLat | ELng | ELat | Etime | Duration | |
---|---|---|---|---|---|---|---|---|
0 | 22223 | 00:03:23 | 114.16746499999999 | 22.562468 | 114.22523500000001 | 22.55275 | 00:10:48 | 0 days 00:07:25 |
1 | 22223 | 00:11:33 | 114.22715 | 22.554167 | 114.22921799999999 | 22.560217 | 00:15:19 | 0 days 00:03:46 |
2 | 22223 | 00:17:13 | 114.23135400000001 | 22.562166 | 114.255798 | 22.590967000000003 | 00:29:06 | 0 days 00:11:53 |
3 | 22223 | 00:36:45 | 114.240196 | 22.56365 | 114.119965 | 22.566668 | 00:54:42 | 0 days 00:17:57 |
4 | 22223 | 01:01:14 | 114.13541399999998 | 22.575933 | 114.166748 | 22.608267 | 01:08:17 | 0 days 00:07:03 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
464712 | 36947 | 22:08:22 | 113.914 | 22.5314 | 113.997 | 22.5456 | 22:36:53 | 0 days 00:28:31 |
464713 | 36947 | 22:39:12 | 114.006 | 22.5481 | 113.996 | 22.5371 | 22:46:25 | 0 days 00:07:13 |
464714 | 36947 | 22:49:38 | 113.995 | 22.535 | 113.922 | 22.4965 | 23:13:15 | 0 days 00:23:37 |
464715 | 36947 | 23:24:24 | 113.921 | 22.5135 | 113.93 | 22.4942 | 23:30:32 | 0 days 00:06:08 |
464716 | 36947 | 23:37:09 | 113.928 | 22.5126 | 113.911 | 22.4879 | 23:49:10 | 0 days 00:12:01 |
464717 rows × 8 columns
r=taxiod['Duration'].iloc[0]
taxiod['order_time']=taxiod['Duration'].apply(lambda r:r.seconds)
<ipython-input-13-d23b5d7f6867>:2: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
taxiod['order_time']=taxiod['Duration'].apply(lambda r:r.seconds)
taxiod.drop(columns=['Duration'])
VehicleNum | Stime | SLng | SLat | ELng | ELat | Etime | order_time | |
---|---|---|---|---|---|---|---|---|
0 | 22223 | 00:03:23 | 114.16746499999999 | 22.562468 | 114.22523500000001 | 22.55275 | 00:10:48 | 445 |
1 | 22223 | 00:11:33 | 114.22715 | 22.554167 | 114.22921799999999 | 22.560217 | 00:15:19 | 226 |
2 | 22223 | 00:17:13 | 114.23135400000001 | 22.562166 | 114.255798 | 22.590967000000003 | 00:29:06 | 713 |
3 | 22223 | 00:36:45 | 114.240196 | 22.56365 | 114.119965 | 22.566668 | 00:54:42 | 1077 |
4 | 22223 | 01:01:14 | 114.13541399999998 | 22.575933 | 114.166748 | 22.608267 | 01:08:17 | 423 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
464712 | 36947 | 22:08:22 | 113.914 | 22.5314 | 113.997 | 22.5456 | 22:36:53 | 1711 |
464713 | 36947 | 22:39:12 | 114.006 | 22.5481 | 113.996 | 22.5371 | 22:46:25 | 433 |
464714 | 36947 | 22:49:38 | 113.995 | 22.535 | 113.922 | 22.4965 | 23:13:15 | 1417 |
464715 | 36947 | 23:24:24 | 113.921 | 22.5135 | 113.93 | 22.4942 | 23:30:32 | 368 |
464716 | 36947 | 23:37:09 | 113.928 | 22.5126 | 113.911 | 22.4879 | 23:49:10 | 721 |
464717 rows × 8 columns
taxiod['hour']=taxiod['Stime'].apply(lambda r:r.split(':')[0])
taxiod
<ipython-input-15-c7c6b55b9ff2>:1: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
taxiod['hour']=taxiod['Stime'].apply(lambda r:r.split(':')[0])
VehicleNum | Stime | SLng | SLat | ELng | ELat | Etime | Duration | order_time | hour | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 22223 | 00:03:23 | 114.16746499999999 | 22.562468 | 114.22523500000001 | 22.55275 | 00:10:48 | 0 days 00:07:25 | 445 | 00 |
1 | 22223 | 00:11:33 | 114.22715 | 22.554167 | 114.22921799999999 | 22.560217 | 00:15:19 | 0 days 00:03:46 | 226 | 00 |
2 | 22223 | 00:17:13 | 114.23135400000001 | 22.562166 | 114.255798 | 22.590967000000003 | 00:29:06 | 0 days 00:11:53 | 713 | 00 |
3 | 22223 | 00:36:45 | 114.240196 | 22.56365 | 114.119965 | 22.566668 | 00:54:42 | 0 days 00:17:57 | 1077 | 00 |
4 | 22223 | 01:01:14 | 114.13541399999998 | 22.575933 | 114.166748 | 22.608267 | 01:08:17 | 0 days 00:07:03 | 423 | 01 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
464712 | 36947 | 22:08:22 | 113.914 | 22.5314 | 113.997 | 22.5456 | 22:36:53 | 0 days 00:28:31 | 1711 | 22 |
464713 | 36947 | 22:39:12 | 114.006 | 22.5481 | 113.996 | 22.5371 | 22:46:25 | 0 days 00:07:13 | 433 | 22 |
464714 | 36947 | 22:49:38 | 113.995 | 22.535 | 113.922 | 22.4965 | 23:13:15 | 0 days 00:23:37 | 1417 | 22 |
464715 | 36947 | 23:24:24 | 113.921 | 22.5135 | 113.93 | 22.4942 | 23:30:32 | 0 days 00:06:08 | 368 | 23 |
464716 | 36947 | 23:37:09 | 113.928 | 22.5126 | 113.911 | 22.4879 | 23:49:10 | 0 days 00:12:01 | 721 | 23 |
464717 rows × 10 columns
import matplotlib.pyplot as plt
fig =plt.figure(1,(7,3),dpi=250)
ax =plt.subplot(111)
plt.sca(ax)
plt.boxplot(taxiod['order_time']/60)
plt.ylabel('minutes')
plt.xlabel('order time')
plt.ylim(0,60)
plt.show()
import matplotlib.pyplot as plt
fig = plt.figure(1,(10,5),dpi = 250)
ax = plt.subplot(111)
plt.sca(ax)
#整理数据
hour = taxiod['hour'].drop_duplicates().sort_values()
datas = []
for i in range(len(hour)):
datas.append(taxiod[taxiod['hour']==hour.iloc[i]]['order_time']/60)
#绘制
plt.boxplot(datas)
#更改x轴ticks的文字
plt.xticks(range(1,len(hour)+1),list(hour))
###################################################################################
plt.ylabel('Order time(minutes)')
plt.xlabel('Order start time')
plt.ylim(0,60)
plt.show()
import seaborn as sns
fig = plt.figure(1,(10,5),dpi = 250)
ax = plt.subplot(111)
plt.sca(ax)
# 只需一行
sns.boxplot(x='hour',y=taxiod['order_time']/60,data=taxiod,ax=ax)
plt.ylabel('order_time(minutes)')
plt.xlabel('order start time')
plt.ylim(0,(60))
plt.show()