OD拆分
import csv
import pandas as pd
lines=list(csv.reader(open(r"D:\大四上\数据课程设计\two_plate_data.csv",encoding='ansi')))
print('\n一共{0}条数据。'.format(str(lines.shape[0]))) # 一共46381313条数据。
header,values=lines[0],lines[1:]
data_dict={h:v for h,v in zip(header,zip(*values))}
#print(data_dict)
df=pd.DataFrame(data_dict)#转换成dataframe格式
#排序
df = df.astype({'plate_number':'str','time':'int','whether_operation':'int'})
df.sort_values(by=['plate_number','time'])
#转到列表
data=[]
data=df.values.tolist()
data.sort(key=lambda x:(x[4],x[5]))
#OD划分
n=len(df)
od=[]
for i in range(n-1):
if data[i][4]==data[i+1][4] and (data[i][6]-data[i+1][6])==-1:
od.append(data[i])
if data[i][4]==data[i+1][4] and (data[i][6]-data[i+1][6])==1:
od.append(data[i])
#OD并行,计算出行时长
def timegap(t1,t2):
T1=0
T2=0
T1+=t1%100+int(t1/100)%100*60+int(t1/10000)*3600
T2+=t2%100+int(t2/100)%100*60+int(t2/10000)*3600
T=T2-T1
return T
od1=[]
m=len(od)
for j in range(0,m-2,2):
od1.append([od[j][4],od[j][2],od[j][3],od[j][5],od[j+1][2],od[j+1][3],od[j+1][5]])
for l in range(len(od1)):
od1[l].append(timegap(od1[l][3],od1[l][6]))
栅格处理
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import datetime
start = datetime.datetime.now() # 读数据计时
f = open(r'D:/大四上/数据课程设计/3.csv',encoding='utf-8',errors='ignore')
unsettled_od = pd.read_csv(f,header=0)
f = open(r'D:/大四上/数据课程设计/小区.csv',encoding='utf-8',errors='ignore')#小区表格由shape栅格文件导出
grid = pd.read_csv(f,header=0)
#unsettled_od = pd.read_csv(f,names = ['plate_number','O_date','O_lat', 'O_lon','O_time','O',
# 'D_date','D_lat', 'D_lon','D_time','D','timegap'])
#unsettled_od.rename(columns = {'index','plate_number','O_date','O_lat', 'O_lon','O_time','O',
# 'D_date','D_lat', 'D_lon','D_time','D','timegap'},inplace = True)
unsettled_od['is_out']='1'
unsettled_od['o_grid']='9999'
unsettled_od['d_grid']='9999'
#unsettled_o = unsettled_od[['plate_number','timegap','O_date','O_lat', 'O_lon','O_time','o_grid']]
#unsettled_d = unsettled_od[['plate_number','timegap','D_date','D_lat', 'D_lon','D_time','d_grid']]
end = datetime.datetime.now()
print('读数据耗时:',end-start)
# 初步查看数据
print('原始od数据集:')
unsettled_od.head()
# In[] 编号计算
#输入左下角拐点坐标
lng0=113.751943
lat0=22.447837
delta_lng=0.019519038
delta_lat=0.017985323
column=45
row=24
lat_max=22.864748
lat_min=22.447837
lng_max=114.624187
lng_min=113.751943
#判断是否在区域内
def delete(lng1, lat1,lng2, lat2):
if lat_min<lat1<lat_max and lat_min<lat2<lat_max and lng_min<lng1<lng_max and lng_min<lng2<lng_max:
a=1
else:
a=0
return [a]
result = pd.DataFrame(list(map(delete,unsettled_od['O_lon'],unsettled_od['O_lat'],unsettled_od['D_lon'],unsettled_od['D_lat'])))
unsettled_od['is_out']=result[0]
unsettled_od = unsettled_od[~(unsettled_od['is_out']==0)]#超出范围删除
#编号判断
def gridid(lng, lat):
grid=(int((lat-lat0)/ delta_lat))*column+(int((lng-lng0)/ delta_lng))
return [grid]
result = pd.DataFrame(list(map(gridid,unsettled_od['O_lon'],unsettled_od['O_lat'])))
unsettled_od['o_grid'] = result[0]
print('O点已成功编码!')
result = pd.DataFrame(list(map(gridid,unsettled_od['D_lon'],unsettled_od['D_lat'])))
unsettled_od['d_grid'] = result[0]
print('D点已成功编码!')
# In[] 小区统计
O_grid=pd.value_counts(unsettled_od['o_grid'])
D_grid=pd.value_counts(unsettled_od['d_grid'])
for i in O_grid.index:
grid['O_count'][i]=O_grid[i]
for i in D_grid.index:
grid['D_count'][i]=D_grid[i]
#判断方法:(Int((lat2-lat0)/ Δlat))*每行的栅格数+(Int((lng2-lng0)/ Δlng))
#j=0
#for j in unsettled_od.index:
# #O点栅格编号1 D点栅格编号2
# lat1=unsettled_od['O_lat'][j]
# lng1=unsettled_od['O_lon'][j]
# lat2=unsettled_od['D_lat'][j]
# lng2=unsettled_od['D_lon'][j]
# if lat_min<lat1<lat_max and lat_min<lat2<lat_max and lng_min<lng1<lng_max and lng_min<lng2<lng_max:
# unsettled_od['o_grid'][j]=(int((lat1-lat0)/ delta_lat))*column+(int((lng1-lng0)/ delta_lng))
# o_num=unsettled_od['o_grid'][j]
# grid['O_count'][[o_num]]=grid['O_count'][[o_num]]+1
# unsettled_od['d_grid'][j]=(int((lat2-lat0)/ delta_lat))*column+(int((lng2-lng0)/ delta_lng))
# d_num=unsettled_od['d_grid'][j]
# grid['D_count'][[d_num]]=grid['D_count'][[d_num]]+1
# else:
# unsettled_od = unsettled_od[~(unsettled_od.index==j)]#超出范围删除
unsettled_od.to_csv('D:/大四上/数据课程设计/4.csv')
grid.to_csv('D:/大四上/数据课程设计/小区统计.csv')