高德爬取多边形范围内POI获取详细信息(包括图片下载)

1.四叉树划分解决每次只能获取200个的限制,参考:高德poi获取之矩形搜索法(冲出900条限制)_检索某一个poi的地点信息 有数量限制不-CSDN博客

2.数据保存为.xlsx,图片保存为.jpg

3.数据说明,空值填null

#基本信息
 0,id    
 1,name    
 2,lon    
 3,lat    
 4,address    
 5,pname# 省,天津无省,所以值为天津市
 6,cityname# 市    
 7,adname# 区

#详细信息
 8, 'typecode'# 兴趣点类型编码,例如:050118,详见高德POI分类文档
 9, 'typeBig'# 大类,中类,小类 e.g.购物服务;专卖店;阿迪达斯
10, 'typeMiddle'
11, 'typeSmall'
13, 'telephone'  # POI的电话
14, 'website'  # 网址
15, 'POI_tag'  # 该 POI 的特色内容, 主要出现在美食类 POI 中,代表特色菜例如“烤鱼”
16, 'business_area'  # 所属商圈

#  深度信息:评分、人均消费、是否可订餐、是否可选座、是否可订票、是否可以订房
17, 'Rating'
18, 'AveCost_perPerson'
19, 'meal_ordering' # 以下四个字段基本都为null
20, 'seat_ordering'
21, 'ticket_ordering'
22, 'hotel_ordering'

# 照片相关信息:图片介绍, 图片名
23/25/27 , 'title'
24/26/28 , 'image_name' 
title0    image_name0    title2    image_name2    title4    image_name4

#图片命名
图片名 = 基本信息id_序号 (序号 = 0、2、4)
e.g.B0FFHIT32A-0, B0FFHIT32A-2, 两张均为POI点id=B0FFHIT32A 的图片

# -*- coding: utf-8 -*-
import requests
import json
import openpyxl
import math
import os

# TODO
amap_web_key = '...'  # 填入高德地图官网申请的Web API KEY

# 填入多边形边界集合:
polygon_list = ['116.989, 39.089, 117.131, 39.002']


# POI分类集合, 多个类型用竖线 | 分割
os.chdir("D:\\GC-WorkDocument\\POIData")
with open('POITypeCode.txt', 'r') as file:
    file_content = file.read()

# 将读取的内容存储为字符串变量
type_list = str(file_content).split("|")
oneType = '0'

poi_search_url = "http://restapi.amap.com/v3/place/polygon"  # URL
offset = 25  # 分页请求数据时的单页大小


def gcj02_to_wgs84(lon, lat):
    #自己补全
    return lon, lat


# 根据矩形坐标获取poi数据
def getpois(polygon, type_list):
    i = 1
    current_polygon_poi_list = []
    while True:  # 使用while循环不断分页获取数据
        result = getpoi_page(polygon, i, type_list)
        result = json.loads(result)  # 将字符串转换为json

        if result['status'] != '1':  # 接口返回的状态不是1代表异常
            print("get data wrong cause:", result)
            break
        pois = result['pois']
        if len(pois) < offset:  # 返回的数据不足分页页大小,代表数据爬取完
            current_polygon_poi_list.extend(pois)
            break
        current_polygon_poi_list.extend(pois)
        i += 1
    print("current type: " + oneType + " has this number of POI: ", str(len(current_polygon_poi_list)))

    return current_polygon_poi_list


# 单页获取pois
def getpoi_page(polygon, page, type_list):
    # print(polygon)
    req_url = poi_search_url + "?key=" + amap_web_key + '&extensions=all&polygon=' + polygon + '&offset=' + str(
        offset) + '&types=' + type_list + '&page=' + str(page) + '&output=json'
    data = ''
    with requests.get(req_url) as response:
        data = response.text
        # print(data)
    return data


# 图片下载
def download_image(image_url, save_path):
    # 发起 GET 请求下载图片
    response = requests.get(image_url)
    # 检查响应状态码是否为 200(表示成功)
    if response.status_code == 200:
        # 获取文件夹路径
        save_folder = os.path.dirname(save_path)
        # 如果文件夹不存在,则创建文件夹
        if not os.path.exists(save_folder):
            os.makedirs(save_folder)
        # 打开文件并以二进制写入的方式保存图片
        with open(save_path, 'wb') as file:
            file.write(response.content)
        # print("Image downloaded and saved at: {}".format(save_path))
        return 1
    else:
        print("Failed to download image.")
        return 0


# 数据写入excel
def write_to_excel(poilist):
    # 打开现有的 Excel 文件
    workbook = openpyxl.load_workbook('D:\\GC-WorkDocument\\POIData\\POIDataTJ\\POI.xlsx')

    # 选择工作表
    sheet = workbook.active

    # 获取最后一行的行号
    index = sheet.max_row - 1

    for i in range(len(poilist)):
        index = index + 1
        typecodeList = poilist[i].get('typecode').split("|")
        typecode = '0'
        for code in typecodeList:
            if code == oneType:
                typecode = code
                # print("current type: ", typecode)
        if typecode == '0':
            index = index - 1
            continue
        poisID = poilist[i]['id']
        sheet.cell(index + 1, 1, poisID)
        sheet.cell(index + 1, 2, poilist[i]['name'])
        lon = float(str(poilist[i]['location']).split(",")[0])
        lat = float(str(poilist[i]['location']).split(",")[1])
        # print(poilist[i]['type'])输出为“购物服务;专卖店;专营店”
        typeBig = poilist[i]['type'].encode('unicode_escape').decode('utf-8').split(";")[0]
        typeMiddle = poilist[i]['type'].encode('unicode_escape').decode('utf-8').split(";")[1]
        typeSmall = poilist[i]['type'].encode('unicode_escape').decode('utf-8').split(";")[2]

        # 将高德坐标转换为WGS 84坐标
        lon, lat = gcj02_to_wgs84(lon, lat)

        sheet.cell(index + 1, 3, lon)
        sheet.cell(index + 1, 4, lat)
        address = poilist[i].get('address')
        if address and isinstance(address, str):
            sheet.cell(index + 1, 5, address)
        else:
            # 处理空地址或无法转换的情况
            sheet.cell(index + 1, 5, "null")
        sheet.cell(index + 1, 6, poilist[i].get('pname'))
        sheet.cell(index + 1, 7, poilist[i].get('cityname'))
        sheet.cell(index + 1, 8, poilist[i].get('adname'))

        sheet.cell(index + 1, 9, typecode)
        sheet.cell(index + 1, 10, typeBig.encode().decode('unicode_escape'))
        sheet.cell(index + 1, 11, typeMiddle.encode().decode('unicode_escape'))
        sheet.cell(index + 1, 12, typeSmall.encode().decode('unicode_escape'))
        biz_type = tel = 'null'
        if poilist[i].get('biz_type'):
            biz_type = poilist[i].get('biz_type')
        if poilist[i]['tel']:
            tel = poilist[i]['tel']
        sheet.cell(index + 1, 13, biz_type)
        sheet.cell(index + 1, 14, tel)
        website = 'null'
        if poilist[i]['website']:
            website = poilist[i]['website']
        sheet.cell(index + 1, 15, website)  # 网址

        tag = business_area = 'null'
        if poilist[i]['tag']:
            tag = poilist[i]['tag']
        if poilist[i]['business_area']:
            business_area = poilist[i]['business_area'].encode('unicode_escape').decode('utf-8')
        sheet.cell(index + 1, 16, tag)  # 该 POI 的特色内容, 主要出现在美食类 POI 中,代表特色菜例如“烤鱼”
        sheet.cell(index + 1, 17, business_area.encode().decode('unicode_escape'))  # 所属商圈

        # biz_ext 深度信息:评分、人均消费、是否可订餐、是否可选座、是否可订票、是否可以订房
        rating = cost = meal_ordering = seat_ordering = ticket_ordering = hotel_ordering = 'null'
        if poilist[i]['biz_ext']['rating']:
            rating = poilist[i]['biz_ext']['rating']
        if poilist[i]['biz_ext']['cost']:
            cost = poilist[i]['biz_ext']['cost']
        if 'meal_ordering' in poilist[i]['biz_ext']:
            if poilist[i]['biz_ext']['meal_ordering'] != '0':
                meal_ordering = poilist[i]['biz_ext']['meal_ordering']
        if 'seat_ordering' in poilist[i]['biz_ext']:
            seat_ordering = poilist[i]['biz_ext']['seat_ordering']
        if 'ticket_ordering' in poilist[i]['biz_ext']:
            ticket_ordering = poilist[i]['biz_ext']['ticket_ordering']
        if 'hotel_ordering' in poilist[i]['biz_ext']:
            hotel_ordering = poilist[i]['biz_ext']['hotel_ordering']
        sheet.cell(index + 1, 18, rating)
        sheet.cell(index + 1, 19, cost)
        sheet.cell(index + 1, 20, meal_ordering)
        sheet.cell(index + 1, 21, seat_ordering)
        sheet.cell(index + 1, 22, ticket_ordering)
        sheet.cell(index + 1, 23, hotel_ordering)

        # 照片相关信息:图片介绍, 具体链接
        sheet.cell(index + 1, 24, 'null')
        sheet.cell(index + 1, 25, 'null')
        sheet.cell(index + 1, 26, 'null')
        sheet.cell(index + 1, 27, 'null')
        sheet.cell(index + 1, 28, 'null')
        sheet.cell(index + 1, 29, 'null')

        j = -2
        for entity in poilist[i]['photos']:
            j = j + 2
            image_name = poisID + '-'
            if entity['title']:
                title = entity['title']
                sheet.cell(index + 1, 24 + j, title)
            if entity['url']:
                url = entity['url']
                # 输入要下载的图片 URL 和保存路径
                save_folder = "D:\\GC-WorkDocument\\POIData\\POIDataTJ\\images"  # 设置保存图片的文件夹
                image_name = image_name + str(j) + '.jpg'
                save_path = os.path.join(save_folder, image_name)
                # 调用下载图片的函数
                success = download_image(url, save_path)
                sheet.cell(index + 1, 25 + j, image_name)
                if success == 0:
                    sheet.cell(index + 1, 24 + j, 'null')
                    sheet.cell(index + 1, 25 + j, 'null')
    workbook.save('D:\\GC-WorkDocument\\POIData\\POIDataTJ\\POI.xlsx')
    print ("write done!")


def Quadrangle(key, polygon):
    """
    :param key:高德地图密钥
    :param polygon: 矩形左上跟右下坐标的列表
    :return:
    """
    # 准备一个空列表,存放切割后的子区域
    PolygonList = []
    for i in range(len(polygon)):
        currentMinlon = round(polygon[i][3], 6)  # 当前区域的最小经度
        currentMaxlon = round(polygon[i][1], 6)  # 当前区域的最大经度
        currentMaxlat = round(polygon[i][2], 6)  # 当前区域的最大纬度
        currentMinlat = round(polygon[i][0], 6)  # 当前区域的最小纬度

        # cerrnt_list = [currentMinlon, currentMaxlat, currentMaxlon, currentMinlat]
        # 将多边形输入获取函数中,判断区域内poi的数量
        polygonStr = format(currentMinlon, '.6f') + '|' + format(currentMaxlat, '.6f') + ',' + format(currentMaxlon,
                                                                                                      '.6f') + '|' + format(
            currentMinlat, '.6f')
        polygon_poi_list = getpois(polygonStr, oneType)
        # 如果数量大于800,那么返回False,对区域进行切分,否则返回区域的坐标对
        if len(polygon_poi_list) < 200:
            if len(polygon_poi_list) != 0:
                print('该区域poi数量小于200大于0,正在写入数据')
                print('the number of POI that get:', len(polygon_poi_list))
                write_to_excel(polygon_poi_list)
        else:
            # 左上矩形
            PolygonList.append([
                currentMinlat,  # 左经
                currentMaxlon,  # 上纬
                (currentMaxlat + currentMinlat) / 2,  # 右经
                (currentMaxlon + currentMinlon) / 2])  # 下纬
            # 右上矩形
            PolygonList.append([
                (currentMaxlat + currentMinlat) / 2,  # 左经
                currentMaxlon,  # 上纬
                currentMaxlat,  # 右经
                (currentMaxlon + currentMinlon) / 2  # 下纬
            ])
            # 左下矩形
            PolygonList.append([
                currentMinlat,  # 左经
                (currentMaxlon + currentMinlon) / 2,  # 上纬
                (currentMaxlat + currentMinlat) / 2,  # 右经
                currentMinlon  # 下纬
            ])
            # 右下矩形
            PolygonList.append([
                (currentMaxlat + currentMinlat) / 2,  # 左经
                (currentMaxlon + currentMinlon) / 2,  # 上纬
                currentMaxlat,  # 右经
                currentMinlon  # 下纬
            ])
            # 当带切分的区域数量为0时,返回上一层矩形
            if len(PolygonList) == 0:
                break
            else:
                # 继续切分区域
                Quadrangle(key, PolygonList)


if __name__ == "__main__":
    print('开始爬取...')

    for polygonStr in polygon_list:
        print('current polygon:', polygonStr)
        lonlatStrList = polygonStr.split(",")
        polygon = []
        lonlatList = []
        for str1 in lonlatStrList:  # '116.98904536414594, 39.0897684272285, 117.02446844593, 39.06782334959452'
            lonlatList.append(float(str1))
        polygon.append([
            lonlatList[3],
            lonlatList[2],
            lonlatList[1],
            lonlatList[0]
        ])
        for type in type_list:
            oneType = type
            Quadrangle(amap_web_key, polygon)

    print(r'写入成功')

  • 5
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值