百度地图POI爬虫(Python3)

示例一

# -*- coding:utf-8 -*-
import csv
import json
import codecs
import os
import string
import sys
import time
import urllib.request
from urllib.parse import quote
from builtins import object, float, range, int, len, open, list, str


class BaiDuPOI(object):
    def __init__(self, itemy, loc):
        self.itemy = itemy
        self.loc = loc

    def urls(self):
        api_key = baidu_api
        urls = []
        for pages in range(0, 2):
            url = 'http://api.map.baidu.com/place/v2/search?query=' + self.itemy + '&bounds=' + self.loc + '&page_size=20&page_num=' + str(
                pages) + '&output=json&ak=' + api_key
            urls.append(url)
        return urls

    def baidu_search(self):
        '''json_sel = []
        for url in self.urls():
            s = quote(url, safe=string.printable)
            json_obj = urllib.request.urlopen(s).read().decode('utf-8')
            data = json.loads(json_obj)
            for item in data['results']:
                jname = item["name"]
                jlat = item["location"]["lat"]
                jlng = item["location"]["lng"]
                js_sel = jname + ',' + str(jlat) + ',' + str(jlng)
                json_sel.append(js_sel)
        return json_sel
        '''
        data = []
        for url in self.urls():
            s = quote(url, safe=string.printable)
            json_obj = urllib.request.urlopen(s).read().decode('utf-8')
            data.append(json.loads(json_obj))
            time.sleep(1)  # 休眠1秒
        return data


class LocaDiv(object):
    def __init__(self, loc_all):
        self.loc_all = loc_all

    def lat_all(self):
        lat_sw = float(self.loc_all.split(',')[0])
        lat_ne = float(self.loc_all.split(',')[2])
        lat_list = []
        for i in range(0, int((lat_ne - lat_sw + 0.0001) / 0.2)):  # 0.1为网格大小,可更改
            lat_list.append(lat_sw + 0.2 * i)  # 0.05
        lat_list.append(lat_ne)
        return lat_list

    def lng_all(self):
        lng_sw = float(self.loc_all.split(',')[1])
        lng_ne = float(self.loc_all.split(',')[3])
        lng_list = []
        for i in range(0, int((lng_ne - lng_sw + 0.0001) / 0.3)):  # 0.2为网格大小,可更改
            lng_list.append(lng_sw + 0.3 * i)  # 0.2为网格大小,可更改
        lng_list.append(lng_ne)
        return lng_list

    def ls_com(self):
        l1 = self.lat_all()
        l2 = self.lng_all()
        ab_list = []
        for i in range(0, len(l1)):
            a = str(l1[i])
            for i2 in range(0, len(l2)):
                b = str(l2[i2])
                ab = a + ',' + b
                ab_list.append(ab)
        return ab_list

    def ls_row(self):
        l1 = self.lat_all()
        l2 = self.lng_all()
        ls_com_v = self.ls_com()
        ls = []
        for n in range(0, len(l1) - 1):
            for i in range(0 + len(l1) * n, len(l2) + (len(l2)) * n - 1):
                a = ls_com_v[i]
                b = ls_com_v[i + len(l2) + 1]
                ab = a + ',' + b
                ls.append(ab)
        return ls


if __name__ == '__main__':
    doc = open('NTPOI.csv', 'a+')
    writer = csv.writer(doc)

    # ak
    baidu_api = "*********************"  # 这里填入你的百度API的ak
    print("开始爬取数据,请稍等...")
    start_time = time.time()
    loc = LocaDiv('31.69,120.54,32.65,121.95')
    locs_to_use = loc.ls_row()

    for loc_to_use in locs_to_use:
        par = BaiDuPOI('学校', loc_to_use)  # 请修改爬取的类别
        '''
        a = par.baidu_search()
        for ax in a:
            writer.writerow(a)
       '''

        listdata = par.baidu_search()
        for resultIndex in range(len(listdata)):  # 提取返回的结果
            resultlist=listdata[resultIndex]["results"]
            for pIndex in  range(len(resultlist)):
                 writer.writerow(list(resultlist[pIndex].values()))
        # time.sleep(1)  # 休眠1秒

    doc.close()
    end_time = time.time()
    print("学校爬取完毕,用时%.2f秒" % (end_time - start_time))

示例二

# 提取城市的POI点信息并将其保存至CSV
import csv
import string
import urllib
import json
from urllib.parse import quote

left_bottom = [120.89,31.83];  # 设置区域左下角坐标(百度坐标系)
right_top = [121.40,32.47]; # 设置区域右上角坐标(百度坐标系)
part_n = 8;  # 设置区域网格(2*2)
url0 = 'http://api.map.baidu.com/place/v2/search?';
x_item = (right_top[0]-left_bottom[0])/part_n;
y_item = (right_top[1]-left_bottom[1])/part_n;
query = '学校'; #搜索关键词设置
ak = 'OGTkT2pv********************mm9mek'; #百度地图api信令
n = 0; # 切片计数器

datacsv=open("baidu.csv", "a+", encoding="utf-8");
csvwriter = csv.writer(datacsv, dialect=("excel"))

for i in range(part_n):
    for j in range(part_n):
        left_bottom_part = [left_bottom[0]+i*x_item,left_bottom[1]+j*y_item]; # 切片的左下角坐标
        right_top_part = [right_top[0]+i*x_item,right_top[1]+j*y_item]; # 切片的右上角坐标
        for k in range(20):
            url = url0 + 'query=' + query + '&page_size=20&page_num=' + str(k) + '&scope=1&bounds=' + str(left_bottom_part[1]) + ',' + str(left_bottom_part[0]) + ','+str(right_top_part[1]) + ',' + str(right_top_part[0]) + '&output=json&ak=' + ak;
            s=quote(url, safe=string.printable)
            data = urllib.request.urlopen(s);
            hjson = json.loads(data.read().decode('utf-8'));
            if hjson['message'] == 'ok':
                results = hjson['results'];
                for m in range(len(results)): # 提取返回的结果
                    csvwriter.writerow(list(results[m].values()))
        n += 1;
        print('第',str(n),'个切片入库成功')

如图:

在这里插入图片描述
在这里插入图片描述

  • 7
    点赞
  • 20
    收藏
    觉得还不错? 一键收藏
  • 6
    评论
评论 6
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值