python按经纬度范围爬取高德地图POI

        按照网格划分一个大区域,使每个网格的POI小于100*20,参数有网格经纬度范围、按照经纬度划分网格数,通过高德地图API爬取POI最起码先了解高德地图API的一些用法,直接上代码,这里不在赘述原理:

import random

import requests
import pandas as pd
import json
import numpy as np
import time



url="https://restapi.amap.com/v5/place/polygon?key={3}&polygon={0}&page_size={1}&page_num={2}"

#保存路径 以xlsx格式保存
savePath="BeiJingSelect_Pois_.xlsx"

#这里填入自己的高德地图webAPI密钥
key_list=['高德地图webAPI密钥1','高德地图webAPI密钥2','高德地图webAPI密钥3']

#爬取的经纬度范围
Xmin=116.215275
Ymin=39.762179
Xmax=116.575401
Ymax=40.057342

#划分为10*10  这里根据爬取范围修改
x_partion_num=10
y_partion_num=10



'''高频访问会建立造成大量连接,对服务器造成压力,因此需要进行一些操作来减少压力'''
#增大重新连接次数
requests.DEFAULT_RETRIES = 5


def get_gridRegion_list(Xmin,Ymin,Xmax,Ymax,x_partion_num,y_partion_num):
    '''

    :param Xmin:
    :param Ymin:
    :param Xmax:
    :param Ymax:
    :param x_partion_num: x轴分割数
    :param y_partion_num: y轴分个数
    :return: 分割格网四个点的坐标
    '''
    delta_x=(Xmax-Xmin)/x_partion_num
    delta_y=(Ymax-Ymin)/y_partion_num
    grid_list=[]
    for i in range(x_partion_num):
        for j in range(y_partion_num):
            grid_list.append([Xmin+i*delta_x,Ymin+j*delta_y,Xmin+(i+1)*delta_x,Ymin+(j+1)*delta_y])
    return grid_list


def get_A_grid_poi(grid_region:list or tuple,order):
    '''

    :param grid_region:
    :param order:
    :return:
    '''
    poi_list=[]
    headers = {'Connection': 'close'}#如果requests连接数很多,那么在请求中避免使用持久连接
    for page in range(100):
        response = requests.get(
        url.format('{0},{1}|{2},{3}'.format(str(grid_region[0]),str(grid_region[1]),str(grid_region[2]),str(grid_region[3])),
                   25, page,random.choice(key_list)),headers=headers,timeout=(3,7))#如果需要移除SSL认证 verify=False
        #timeout 参数当服务器延迟响应时,等待时间(等待响应时间,等待处理时间)

        if response.status_code==200:
            text=json.loads(response.text)
            nums=text['count']
            if(int(nums)!=0):
                pois=text['pois']
                for poi in pois:
                    id=poi['id']
                    name=poi['name']
                    location=poi['location'].split(',')
                    x=float(location[0])
                    y=float(location[1])
                    Type = poi['type']
                    cityname=poi['cityname']
                    poi_list.append([id, name, x, y, Type, cityname])

            else:
                print('第{0}个格网{1}页结束'.format(order,page))
                if page<2:
                    time.sleep(5)#避免短时间大量发起请求
                return poi_list
        else:
            print('第{}个格网区域访问错误'.format(order))
            pass
        time.sleep(1)
    return poi_list

def get_grids_pois(Xmin,Ymin,Xmax,Ymax,x_partion_num,y_partion_num):
    # all_pois_attrs=['id','name','x','y','type','cityname','rating']
    all_pois_attrs = ['id', 'name', 'x', 'y', 'type', 'cityname']
    all_pois=[all_pois_attrs]
    grid_list=get_gridRegion_list(Xmin,Ymin,Xmax,Ymax,x_partion_num,y_partion_num)
    order=1
    for grid_region in grid_list:
        grid_poi_list=get_A_grid_poi(grid_region,order)
        all_pois.extend(grid_poi_list)
        order+=1
    return all_pois

def save_as_excel(all_pois):
    df=pd.DataFrame(data=np.array(all_pois[1:]),columns=all_pois[0])
    df.to_excel(excel_writer=savePath)



if __name__=='__main__':

    all_pois=get_grids_pois(Xmin,Ymin,Xmax,Ymax,x_partion_num,y_partion_num)
    save_as_excel(all_pois)

  • 0
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值