python 实现杭州所有公交始发、终止站点经纬度爬取匹配(代码正确,注释完整)

数据来源

#读取公交始末站信息
import pandas as pd
df = pd.read_csv('start_terminal.csv')

在这里插入图片描述

经纬度查询

查询网址:http://api.map.baidu.com/lbsapi/getpoint/index.html

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import datetime
import time
from bs4 import BeautifulSoup
import re

headers = {
    'Host': 'http://api.map.baidu.com/lbsapi/getpoint/index.html',
    'Connection': 'keep-alive',
    'X-Requested-With': 'XMLHttpRequest',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',
}

'''
因为有动态操作,因此采用webdriver进行模拟点击
'''
chrome_options = Options()
browser = webdriver.Chrome(chrome_options=chrome_options)
url = 'http://api.map.baidu.com/lbsapi/getpoint/index.html'
browser.get(url)
for i in range(229, len(df)):
    
    #起始站坐标
    start = df.loc[i, 'start']
    #清除搜索框
    browser.find_element_by_id('localvalue').clear()
    #将搜索值传入搜索框
    browser.find_element_by_id('localvalue').send_keys(start +  ' 杭州')
    #搜索
    browser.find_element_by_id('localsearch').click()
    #等待页面加载完毕
    time.sleep(1.5)
    try:
        #将页面html解码
        soup = browser.find_element_by_id('MapInfo')
        #匹配出第一个坐标信息
        r=re.compile(r'坐标.*')
        text = r.search(soup.text).group()
        #匹配出经纬度坐标
        xy = re.findall('\d+.\d+,\d+.\d+', text)   
        #将坐标放入dataframe
        df.loc[i, 'start_xy'] = xy
    except:
        pass
    try:
        #终点站坐标
        terminal = df.loc[i, 'terminal']
        browser.find_element_by_id('localvalue').clear()
        browser.find_element_by_id('localvalue').send_keys(terminal +  ' 杭州')
        browser.find_element_by_id('localsearch').click()
        time.sleep(1.5)
        soup = browser.find_element_by_id('MapInfo')
        r=re.compile(r'坐标.*')
        text = r.search(soup.text).group()
        xy = re.findall('\d+.\d+,\d+.\d+', text)   #匹配出经纬度坐标
        df.loc[i, 'terminal_xy'] = xy
    except:
        pass
    print(i)

'''
坐标处理
'''
for i in range(len(df)):
    print(i)
    try:
        df.loc[i, 'start_x'] = float(df.loc[i, 'start_xy'].split(',')[0])
        df.loc[i, 'start_y'] = float(df.loc[i, 'start_xy'].split(',')[1])
        df.loc[i, 'terminal_x'] = float(df.loc[i, 'terminal_xy'].split(',')[0])
        df.loc[i, 'terminal_y'] = float(df.loc[i, 'terminal_xy'].split(',')[1])
    except:
        continue

可视化

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
plt.style.use('seaborn-whitegrid')

# this function will be used more often to plot data on the NYC map
def plot_on_map(df, BB, map_, s=10, alpha=0.2):
    fig, axs = plt.subplots(1, 2, figsize=(30,20))
    axs[0].scatter(df.start_x, df.start_y, zorder=1, alpha=alpha, c='r', s=s)
    axs[0].set_xlim((BB[0], BB[1]))
    axs[0].set_ylim((BB[2], BB[3]))
    axs[0].set_title('start locations')
    axs[0].imshow(map_, zorder=0, extent=BB)

    axs[1].scatter(df.terminal_x, df.terminal_y, zorder=1, alpha=alpha, c='r', s=s)
    axs[1].set_xlim((BB[0], BB[1]))
    axs[1].set_ylim((BB[2], BB[3]))
    axs[1].set_title('terminal locations')
    axs[1].imshow(map_, zorder=0, extent=BB)
    
# load image of NYC map
#BB为背景图的经纬度坐标
BB = (119.710941, 120.673801, 29.685506, 30.552774)
map_ = plt.imread('loc.png')
# plot training data on map
plot_on_map(d, BB, map_, s=1, alpha=0.3)
plt.savefig('station.png')

在这里插入图片描述

  • 0
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值