基于前文,已经通过8864公交获取了长沙市的所有的公交线路名称、公交站台名称
为获取公交站的经纬度坐标奠定了基础
故开始此次代码编写
代码基于高德地图api的web高级服务api功能开展,具体功能可参见高德地图api的开发文档
此次工作开展有两个思路,一个是从公交线路获取整条公交路线的信息,受启发与同站 “
杨超越luckly”的文章,开始编写,具体内容有些调整,代码如下:
import csv
import time
import pandas as pd
import requests
import json
def opencsv(file): #打开csv文件,提取保存的路线信息
with open(file,'r',encoding='utf-8') as csvfile:
reader = csv.reader(csvfile)
return reader
def getline(name,all_buslines):
GDapi = 'https://restapi.amap.com/v3/bus/linename?' #https://lbs.amap.com/api/webservice/guide/api-advanced/bus-inquiry
params = {
'extensions':'all',
'keywords':name,
'offset':'20',
'city':'长沙',
'key':'你的key'
}
try:
r = requests.get(GDapi,params=params).text
rt = json.loads(r)
if rt.get('buslines') and rt['buslines']: #通过数据 True进行数据核实
busline = rt['buslines'][0] #根据数据结构提取所需信息
dt = {
'line_name':busline['name'],
'start_stop':busline['start_stop'],
'end_stop':busline['end_stop'],
'type': busline['type'],
#'polyline': busline['polyline']
'station_lon_lat':[]
}
list = busline['polyline'].split(';')
for st in list:
dt['station_lon_lat'].append(st)
return pd.DataFrame([dt],index=[len(all_buslines)+1]) #建立表格
else:
return pd.DataFrame()
except Exception as e:
print('Error fetching bus info for line {}:{}'.format(name,e))
return pd.DataFrame()
#def getline2():
def main():
file = r'D:\公交站清洗后utf-8.csv'
list = opencsv(file)
#getline(list)
all_buslines = pd.DataFrame() #初始化文件表格
with open(file,'r',encoding='utf-8') as f: #读取csv文件中的公交车站信息
for i in f:
time.sleep(1.2) #高德设置每秒并发只能1个
list2 = i.split(',')
bus_name = list2[0].strip()
print(bus_name)
df_line = getline(bus_name,all_buslines)
if not df_line.empty:
all_buslines = pd.concat([all_buslines, df_line]) #关于pd建议看看别人的文章,我是依葫芦画瓢
effective_bus_num = len(all_buslines.index)
print('有效公交线路数为:{}个'.format(effective_bus_num))
all_buslines.to_csv('长沙公交路线信息.csv',index=False,encoding='utf-8-sig')
if __name__ == '__main__':
start_time = time.time()
main()
end_time = time.time()
dur = start_time-end_time
print('总用时:{}s'.format(dur))
同时,基于上述方案,还有一种方式,获取每个站点的经纬度
import csv
import time
import pandas as pd
import requests
import json
def getstoplonlat(name,all_stop_info):
GDapi = 'https://restapi.amap.com/v3/bus/stopname?'
params = {
'extensions':'base', #注意通过站点名查询,这里只能选用base
'keywords':name,
'offset':'20',
'city':'0731',
'key':'你的key'
}
try:
r = requests.get(GDapi, params=params).text
rt = json.loads(r)
print(rt)
if rt.get('busstops') and rt['busstops']: # 通过数据 True进行数据核实
busstop = rt['busstops'][0]
dt = {
'line_name': busstop['name'],
'station_lon_lat': busstop['location'],
'stop_id':busstop['id']
'GO_buslines': []
}
print(busstop['buslines'])
for gost in busstop['buslines']:
dt['GO_buslines'].append(gost)
return pd.DataFrame([dt], index=[len(all_stop_info) + 1])
else:
return pd.DataFrame()
except Exception as e:
print('Error fetching bus info for line {}:{}'.format(name, e))
return pd.DataFrame()
file = r'D:\公交站清洗后utf-8.csv'
with open(file,'r',encoding='utf-8') as csvfile:
list1 = csv.reader(csvfile)
#print(type(list1))
list2=[]
for x in list1:
for i in range(1,len(x)):
list2.append(x[i])
unique_list = [x for i,x in enumerate(list2) if list2.index(x) == i] #利用列表推导式和条件判断来创建一个新的列表,只保留第一次出现的元素。 enumerate()函数会将列表转化为带标签的0,list[0]的元组 通过x的索引相同只提取第一次出现的文件
print(len(unique_list))
all_stop_info = pd.DataFrame()
for i in unique_list:
time.sleep(1.2) #高德设置每秒并发只能1个
stop_info = getstoplonlat(unique_list[1].strip(),all_stop_info)
if not stop_info.empty:
all_stop_info = pd.concat([all_stop_info,stop_info])
effective_stops_num = len(all_stop_info.index)
print('有效公交站点数为:{}个'.format(effective_stops_num))
all_stop_info.to_csv('长沙公交站点信息.csv',index=False,encoding='utf-8-sig')
目前该方案经过尝试,是可行的方案,主要受限于高德地图api的每日配额限制,每天只能给与100个配额。
但是还有其他的方案就是通过公交ID来进行检索,后续将继续进行探索。