课题要求建立路网信息,就学习了一下Python爬虫,参考了几个爬虫,根据需求规整,尝试爬取了一下百度道路信息。
代码实现如下:
首先要到百度地图开放平台http://lbsyun.baidu.com/index.php?title=%E9%A6%96%E9%A1%B5建立web应用服务,得到应用对应的ak。
# -*- coding: utf-8 -*-
import urllib3
import json
import requests
import re
import time
left_bottom = [x1,y1]; # 设置区域左下角坐标(百度坐标系)
right_top = [x2,y2]; # 设置区域右上角坐标(百度坐标系)
part_n = 4; # 设置区域网格
num = 1
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'}# 伪装浏览器
url0 = 'http://api.map.baidu.com/place/v2/search?';
x_item = (right_top[0]-left_bottom[0])/part_n;
y_item = (right_top[1]-left_bottom[1])/part_n;
query = '道路'; #搜索关键词设置
ak = ''; # 从百度开发者平台获取的ak
n = 0; # 切片计数器
for i in range(part_n):
for j in range(part_n):
left_bottom_part = [left_bottom[0]+i*x_item,left_bottom[1]+j*y_item]; # 切片的左下角坐标
right_top_part = [right_top[0]+i*x_item,right_top[1]+j*y_item]; # 切片的右上角坐标
for k in range(400):
url = url0 + 'query=' + query + '&page_size=1&page_num=' + str(k) + '&scope=1&bounds=' + str(left_bottom_part[1]) + ',' + str(left_bottom_part[0]) + ','+str(right_top_part[1]) + ',' + str(right_top_part[0]) + '&output=json&ak=' + ak;
print(url)
station = []
data = requests.get(url,headers = headers)
content = data.content
data = json.loads(content)
station.append(data['total'])
if data['results'] != []:
result = data['results']
str_temp = result[0]
loc = str_temp['location']
lng = float(loc['lng'])
lat = float(loc['lat'])
station.append(str_temp['name'] + ",%f" % lng + ",%f" % lat)
print(station[1])
file = open('.\\x.csv', 'a')
file.writelines(station[1] + '\n')
file.close()
time.sleep(1)
if num % 20 == 0:
time.sleep(2)
if num % 100 == 0:
time.sleep(3)
if num % 200 == 0:
time.sleep(7)
num = num + 1
n += 1;