#读取公交始末站信息
import pandas as pd
df = pd.read_csv('start_terminal.csv')
经纬度查询
查询网址:http://api.map.baidu.com/lbsapi/getpoint/index.html
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import datetime
import time
from bs4 import BeautifulSoup
import re
headers = {
'Host': 'http://api.map.baidu.com/lbsapi/getpoint/index.html',
'Connection': 'keep-alive',
'X-Requested-With': 'XMLHttpRequest',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',
}
'''
因为有动态操作,因此采用webdriver进行模拟点击
'''
chrome_options = Options()
browser = webdriver.Chrome(chrome_options=chrome_options)
url = 'http://api.map.baidu.com/lbsapi/getpoint/index.html'
browser.get(url)
for i in range(229, len(df)):
#起始站坐标
start = df.loc[i, 'start']
#清除搜索框
browser.find_element_by_id('localvalue').clear()
#将搜索值传入搜索框
browser.find_element_by_id('localvalue').send_keys(start + ' 杭州')
#搜索
browser.find_element_by_id('localsearch').click()
#等待页面加载完毕
time.sleep(1.5)
try:
#将页面html解码
soup = browser.find_element_by_id('MapInfo')
#匹配出第一个坐标信息
r=re.compile(r'坐标.*')
text = r.search(soup.text).group()
#匹配出经纬度坐标
xy = re.findall('\d+.\d+,\d+.\d+', text)
#将坐标放入dataframe
df.loc[i, 'start_xy'] = xy
except:
pass
try:
#终点站坐标
terminal = df.loc[i, 'terminal']
browser.find_element_by_id('localvalue').clear()
browser.find_element_by_id('localvalue').send_keys(terminal + ' 杭州')
browser.find_element_by_id('localsearch').click()
time.sleep(1.5)
soup = browser.find_element_by_id('MapInfo')
r=re.compile(r'坐标.*')
text = r.search(soup.text).group()
xy = re.findall('\d+.\d+,\d+.\d+', text) #匹配出经纬度坐标
df.loc[i, 'terminal_xy'] = xy
except:
pass
print(i)
'''
坐标处理
'''
for i in range(len(df)):
print(i)
try:
df.loc[i, 'start_x'] = float(df.loc[i, 'start_xy'].split(',')[0])
df.loc[i, 'start_y'] = float(df.loc[i, 'start_xy'].split(',')[1])
df.loc[i, 'terminal_x'] = float(df.loc[i, 'terminal_xy'].split(',')[0])
df.loc[i, 'terminal_y'] = float(df.loc[i, 'terminal_xy'].split(',')[1])
except:
continue
可视化
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
plt.style.use('seaborn-whitegrid')
# this function will be used more often to plot data on the NYC map
def plot_on_map(df, BB, map_, s=10, alpha=0.2):
fig, axs = plt.subplots(1, 2, figsize=(30,20))
axs[0].scatter(df.start_x, df.start_y, zorder=1, alpha=alpha, c='r', s=s)
axs[0].set_xlim((BB[0], BB[1]))
axs[0].set_ylim((BB[2], BB[3]))
axs[0].set_title('start locations')
axs[0].imshow(map_, zorder=0, extent=BB)
axs[1].scatter(df.terminal_x, df.terminal_y, zorder=1, alpha=alpha, c='r', s=s)
axs[1].set_xlim((BB[0], BB[1]))
axs[1].set_ylim((BB[2], BB[3]))
axs[1].set_title('terminal locations')
axs[1].imshow(map_, zorder=0, extent=BB)
# load image of NYC map
#BB为背景图的经纬度坐标
BB = (119.710941, 120.673801, 29.685506, 30.552774)
map_ = plt.imread('loc.png')
# plot training data on map
plot_on_map(d, BB, map_, s=1, alpha=0.3)
plt.savefig('station.png')