1.四叉树划分解决每次只能获取200个的限制,参考:高德poi获取之矩形搜索法(冲出900条限制)_检索某一个poi的地点信息 有数量限制不-CSDN博客
2.数据保存为.xlsx,图片保存为.jpg
3.数据说明,空值填null
#基本信息
0,id
1,name
2,lon
3,lat
4,address
5,pname# 省,天津无省,所以值为天津市
6,cityname# 市
7,adname# 区
#详细信息
8, 'typecode'# 兴趣点类型编码,例如:050118,详见高德POI分类文档
9, 'typeBig'# 大类,中类,小类 e.g.购物服务;专卖店;阿迪达斯
10, 'typeMiddle'
11, 'typeSmall'
13, 'telephone' # POI的电话
14, 'website' # 网址
15, 'POI_tag' # 该 POI 的特色内容, 主要出现在美食类 POI 中,代表特色菜例如“烤鱼”
16, 'business_area' # 所属商圈
# 深度信息:评分、人均消费、是否可订餐、是否可选座、是否可订票、是否可以订房
17, 'Rating'
18, 'AveCost_perPerson'
19, 'meal_ordering' # 以下四个字段基本都为null
20, 'seat_ordering'
21, 'ticket_ordering'
22, 'hotel_ordering'
# 照片相关信息:图片介绍, 图片名
23/25/27 , 'title'
24/26/28 , 'image_name'
title0 image_name0 title2 image_name2 title4 image_name4
#图片命名
图片名 = 基本信息id_序号 (序号 = 0、2、4)
e.g.B0FFHIT32A-0, B0FFHIT32A-2, 两张均为POI点id=B0FFHIT32A 的图片
# -*- coding: utf-8 -*-
import requests
import json
import openpyxl
import math
import os
# TODO
amap_web_key = '...' # 填入高德地图官网申请的Web API KEY
# 填入多边形边界集合:
polygon_list = ['116.989, 39.089, 117.131, 39.002']
# POI分类集合, 多个类型用竖线 | 分割
os.chdir("D:\\GC-WorkDocument\\POIData")
with open('POITypeCode.txt', 'r') as file:
file_content = file.read()
# 将读取的内容存储为字符串变量
type_list = str(file_content).split("|")
oneType = '0'
poi_search_url = "http://restapi.amap.com/v3/place/polygon" # URL
offset = 25 # 分页请求数据时的单页大小
def gcj02_to_wgs84(lon, lat):
#自己补全
return lon, lat
# 根据矩形坐标获取poi数据
def getpois(polygon, type_list):
i = 1
current_polygon_poi_list = []
while True: # 使用while循环不断分页获取数据
result = getpoi_page(polygon, i, type_list)
result = json.loads(result) # 将字符串转换为json
if result['status'] != '1': # 接口返回的状态不是1代表异常
print("get data wrong cause:", result)
break
pois = result['pois']
if len(pois) < offset: # 返回的数据不足分页页大小,代表数据爬取完
current_polygon_poi_list.extend(pois)
break
current_polygon_poi_list.extend(pois)
i += 1
print("current type: " + oneType + " has this number of POI: ", str(len(current_polygon_poi_list)))
return current_polygon_poi_list
# 单页获取pois
def getpoi_page(polygon, page, type_list):
# print(polygon)
req_url = poi_search_url + "?key=" + amap_web_key + '&extensions=all&polygon=' + polygon + '&offset=' + str(
offset) + '&types=' + type_list + '&page=' + str(page) + '&output=json'
data = ''
with requests.get(req_url) as response:
data = response.text
# print(data)
return data
# 图片下载
def download_image(image_url, save_path):
# 发起 GET 请求下载图片
response = requests.get(image_url)
# 检查响应状态码是否为 200(表示成功)
if response.status_code == 200:
# 获取文件夹路径
save_folder = os.path.dirname(save_path)
# 如果文件夹不存在,则创建文件夹
if not os.path.exists(save_folder):
os.makedirs(save_folder)
# 打开文件并以二进制写入的方式保存图片
with open(save_path, 'wb') as file:
file.write(response.content)
# print("Image downloaded and saved at: {}".format(save_path))
return 1
else:
print("Failed to download image.")
return 0
# 数据写入excel
def write_to_excel(poilist):
# 打开现有的 Excel 文件
workbook = openpyxl.load_workbook('D:\\GC-WorkDocument\\POIData\\POIDataTJ\\POI.xlsx')
# 选择工作表
sheet = workbook.active
# 获取最后一行的行号
index = sheet.max_row - 1
for i in range(len(poilist)):
index = index + 1
typecodeList = poilist[i].get('typecode').split("|")
typecode = '0'
for code in typecodeList:
if code == oneType:
typecode = code
# print("current type: ", typecode)
if typecode == '0':
index = index - 1
continue
poisID = poilist[i]['id']
sheet.cell(index + 1, 1, poisID)
sheet.cell(index + 1, 2, poilist[i]['name'])
lon = float(str(poilist[i]['location']).split(",")[0])
lat = float(str(poilist[i]['location']).split(",")[1])
# print(poilist[i]['type'])输出为“购物服务;专卖店;专营店”
typeBig = poilist[i]['type'].encode('unicode_escape').decode('utf-8').split(";")[0]
typeMiddle = poilist[i]['type'].encode('unicode_escape').decode('utf-8').split(";")[1]
typeSmall = poilist[i]['type'].encode('unicode_escape').decode('utf-8').split(";")[2]
# 将高德坐标转换为WGS 84坐标
lon, lat = gcj02_to_wgs84(lon, lat)
sheet.cell(index + 1, 3, lon)
sheet.cell(index + 1, 4, lat)
address = poilist[i].get('address')
if address and isinstance(address, str):
sheet.cell(index + 1, 5, address)
else:
# 处理空地址或无法转换的情况
sheet.cell(index + 1, 5, "null")
sheet.cell(index + 1, 6, poilist[i].get('pname'))
sheet.cell(index + 1, 7, poilist[i].get('cityname'))
sheet.cell(index + 1, 8, poilist[i].get('adname'))
sheet.cell(index + 1, 9, typecode)
sheet.cell(index + 1, 10, typeBig.encode().decode('unicode_escape'))
sheet.cell(index + 1, 11, typeMiddle.encode().decode('unicode_escape'))
sheet.cell(index + 1, 12, typeSmall.encode().decode('unicode_escape'))
biz_type = tel = 'null'
if poilist[i].get('biz_type'):
biz_type = poilist[i].get('biz_type')
if poilist[i]['tel']:
tel = poilist[i]['tel']
sheet.cell(index + 1, 13, biz_type)
sheet.cell(index + 1, 14, tel)
website = 'null'
if poilist[i]['website']:
website = poilist[i]['website']
sheet.cell(index + 1, 15, website) # 网址
tag = business_area = 'null'
if poilist[i]['tag']:
tag = poilist[i]['tag']
if poilist[i]['business_area']:
business_area = poilist[i]['business_area'].encode('unicode_escape').decode('utf-8')
sheet.cell(index + 1, 16, tag) # 该 POI 的特色内容, 主要出现在美食类 POI 中,代表特色菜例如“烤鱼”
sheet.cell(index + 1, 17, business_area.encode().decode('unicode_escape')) # 所属商圈
# biz_ext 深度信息:评分、人均消费、是否可订餐、是否可选座、是否可订票、是否可以订房
rating = cost = meal_ordering = seat_ordering = ticket_ordering = hotel_ordering = 'null'
if poilist[i]['biz_ext']['rating']:
rating = poilist[i]['biz_ext']['rating']
if poilist[i]['biz_ext']['cost']:
cost = poilist[i]['biz_ext']['cost']
if 'meal_ordering' in poilist[i]['biz_ext']:
if poilist[i]['biz_ext']['meal_ordering'] != '0':
meal_ordering = poilist[i]['biz_ext']['meal_ordering']
if 'seat_ordering' in poilist[i]['biz_ext']:
seat_ordering = poilist[i]['biz_ext']['seat_ordering']
if 'ticket_ordering' in poilist[i]['biz_ext']:
ticket_ordering = poilist[i]['biz_ext']['ticket_ordering']
if 'hotel_ordering' in poilist[i]['biz_ext']:
hotel_ordering = poilist[i]['biz_ext']['hotel_ordering']
sheet.cell(index + 1, 18, rating)
sheet.cell(index + 1, 19, cost)
sheet.cell(index + 1, 20, meal_ordering)
sheet.cell(index + 1, 21, seat_ordering)
sheet.cell(index + 1, 22, ticket_ordering)
sheet.cell(index + 1, 23, hotel_ordering)
# 照片相关信息:图片介绍, 具体链接
sheet.cell(index + 1, 24, 'null')
sheet.cell(index + 1, 25, 'null')
sheet.cell(index + 1, 26, 'null')
sheet.cell(index + 1, 27, 'null')
sheet.cell(index + 1, 28, 'null')
sheet.cell(index + 1, 29, 'null')
j = -2
for entity in poilist[i]['photos']:
j = j + 2
image_name = poisID + '-'
if entity['title']:
title = entity['title']
sheet.cell(index + 1, 24 + j, title)
if entity['url']:
url = entity['url']
# 输入要下载的图片 URL 和保存路径
save_folder = "D:\\GC-WorkDocument\\POIData\\POIDataTJ\\images" # 设置保存图片的文件夹
image_name = image_name + str(j) + '.jpg'
save_path = os.path.join(save_folder, image_name)
# 调用下载图片的函数
success = download_image(url, save_path)
sheet.cell(index + 1, 25 + j, image_name)
if success == 0:
sheet.cell(index + 1, 24 + j, 'null')
sheet.cell(index + 1, 25 + j, 'null')
workbook.save('D:\\GC-WorkDocument\\POIData\\POIDataTJ\\POI.xlsx')
print ("write done!")
def Quadrangle(key, polygon):
"""
:param key:高德地图密钥
:param polygon: 矩形左上跟右下坐标的列表
:return:
"""
# 准备一个空列表,存放切割后的子区域
PolygonList = []
for i in range(len(polygon)):
currentMinlon = round(polygon[i][3], 6) # 当前区域的最小经度
currentMaxlon = round(polygon[i][1], 6) # 当前区域的最大经度
currentMaxlat = round(polygon[i][2], 6) # 当前区域的最大纬度
currentMinlat = round(polygon[i][0], 6) # 当前区域的最小纬度
# cerrnt_list = [currentMinlon, currentMaxlat, currentMaxlon, currentMinlat]
# 将多边形输入获取函数中,判断区域内poi的数量
polygonStr = format(currentMinlon, '.6f') + '|' + format(currentMaxlat, '.6f') + ',' + format(currentMaxlon,
'.6f') + '|' + format(
currentMinlat, '.6f')
polygon_poi_list = getpois(polygonStr, oneType)
# 如果数量大于800,那么返回False,对区域进行切分,否则返回区域的坐标对
if len(polygon_poi_list) < 200:
if len(polygon_poi_list) != 0:
print('该区域poi数量小于200大于0,正在写入数据')
print('the number of POI that get:', len(polygon_poi_list))
write_to_excel(polygon_poi_list)
else:
# 左上矩形
PolygonList.append([
currentMinlat, # 左经
currentMaxlon, # 上纬
(currentMaxlat + currentMinlat) / 2, # 右经
(currentMaxlon + currentMinlon) / 2]) # 下纬
# 右上矩形
PolygonList.append([
(currentMaxlat + currentMinlat) / 2, # 左经
currentMaxlon, # 上纬
currentMaxlat, # 右经
(currentMaxlon + currentMinlon) / 2 # 下纬
])
# 左下矩形
PolygonList.append([
currentMinlat, # 左经
(currentMaxlon + currentMinlon) / 2, # 上纬
(currentMaxlat + currentMinlat) / 2, # 右经
currentMinlon # 下纬
])
# 右下矩形
PolygonList.append([
(currentMaxlat + currentMinlat) / 2, # 左经
(currentMaxlon + currentMinlon) / 2, # 上纬
currentMaxlat, # 右经
currentMinlon # 下纬
])
# 当带切分的区域数量为0时,返回上一层矩形
if len(PolygonList) == 0:
break
else:
# 继续切分区域
Quadrangle(key, PolygonList)
if __name__ == "__main__":
print('开始爬取...')
for polygonStr in polygon_list:
print('current polygon:', polygonStr)
lonlatStrList = polygonStr.split(",")
polygon = []
lonlatList = []
for str1 in lonlatStrList: # '116.98904536414594, 39.0897684272285, 117.02446844593, 39.06782334959452'
lonlatList.append(float(str1))
polygon.append([
lonlatList[3],
lonlatList[2],
lonlatList[1],
lonlatList[0]
])
for type in type_list:
oneType = type
Quadrangle(amap_web_key, polygon)
print(r'写入成功')