一、Request爬取数据
1.获取AK
进入【高德地图开放平台】→注册认证成为开发者→【应用管理】→【我的应用】→【创建新应用】→获得AK
2.所需爬取数据
研究所需数据主要由两部分构成,
①高德地图开放平台交通态势数据(简称交通态势数据),爬取链接:https://restapi.amap.com/v3/traffic/status/road?
②高德地图西安城市交通详情数据(简称交通详情数据),爬取链接:
https://report.amap.com/detail.do?city=610100
第一部分数据是通过高德地图提供的lbs服务API获取,第二部分数据是通过requests动态爬取页面获得
3.数据展示
①交通态势数据:
②交通详情数据:
3.爬虫代码
①交通态势数据:
import requests
import json
import time
import csv
def get_one_page(name):
url = 'https://restapi.amap.com/v3/traffic/status/road?parameters'
params = {
'key': '你自己的ak',
'adcode': '610100',
'name': name
}
try:
response = requests.get(url,params=params)
if response.status_code == 200:
return response.json(),name
except requests.ConnectionError as e:
print('Error', e.args)
names = {
'东二环路','北二环路','二环南路西段','二环南路东段','西二环路','金花北路','大兴西路'}
def write_to_file(content):
with open('交通态势.csv','a',newline='') as csvfile:
fieldnames = ['name','name_description','localtime','expedite','congested','blocked','unknown','status','description']
writer = csv.DictWriter(csvfile,fieldnames = fieldnames)
writer.writerow(content)
def parse_page(html,name):
item = html.get('trafficinfo')
luduan = {
}
luduan['name'] = name
luduan['name_description'] = item.get('description')
luduan['localtime'] = time.strftime('%Y-%m-%d,%H:%M:%S',time.localtime(time.time()))
luduan['expedite'] = item.get('evaluation').get('expedite')
luduan['congested'] = item.get('evaluation').get('congested')
luduan['blocked'] = item.get('evaluation').get('blocked')
luduan['unknown'] = item.get('evaluation').get('unknown')
luduan['status'] = item.get('evaluation').get('status')
luduan['description'] = item.get('evaluation').get('description')
#write_to_file(luduan)
yield luduan
while True:
for name in names:
r, n = get_one_page(name)
print(r,n)
y = parse_page(r, n)
#print(list(y))
print("************************************************************")
time.sleep(300)
②交通详情数据:
import requests
from urllib.parse import urlencode
from pyquery