[爬虫] 爬取POI服务

爬取的数据仅用于科研
高德地图POI服务:http://lbs.amap.com/api/webservice/guide/api/search

结果:爬取厦门的公司企业信息
数据可以查看高德地图POI服务,提供哪些数据

这里写图片描述

coordinate_conversion文件:https://blog.csdn.net/summer_dew/article/details/80723434

# -*- coding:utf-8 -*-
# function: 爬取高德POI服务数据
import xlwt
import urllib
from bs4 import BeautifulSoup
import coordinate_conversion

TYPE = u"公司企业"
outPath = r"D:\Users\PasserQi\Desktop\GetAMapPOI\%s.xls" % TYPE

saveField = ["id","name","type","typecode","address","x","y","pname","cityname","business_area","photos"]

AMAP_API_KEY = "4fac3db866dcc3b8a******d3a7db1c7" #高德地图密匙
urlParamJson = {
    'city' : '厦门',
    'output' : 'xml',
    'key' : AMAP_API_KEY,
    'types' : TYPE.encode("utf8"),
    'citylimit' : 'true', #只返回指定城市数据
    'offset' : '20'#每页条数
}
MIN_PAGE = 1
MAX_PAGE = 100 #最大页数


def setStype(name, height, bold=False):
    style = xlwt.XFStyle() #init style
    font = xlwt.Font() #为样式创建字体
    font.name = name
    font.bold = bold
    font.color_index = 4
    font.height = height
    return style

if __name__ == '__main__':
    #create
    w = xlwt.Workbook(encoding="utf-8")
    #create sheet
    sheet = w.add_sheet(TYPE)
    for i in range( len(saveField) ) :
        sheet.write(0, i, saveField[i])

    cur = 1
    for page in range(MIN_PAGE, MAX_PAGE):  # 页数
        urlParamJson["page"] = page
        print "当前 %s 页..." % page
        params = urllib.urlencode(urlParamJson)
        url = "http://restapi.amap.com/v3/place/text?%s" % params
        http = urllib.urlopen(url)
        dom = BeautifulSoup(http)
        poiList = dom.findAll("poi")
        if len(poiList) == 0:  #没有
            break
        for poi in poiList:
            for tag in poi:
                name = tag.name #标签名
                print name
                if name == "photos": #图片
                    index = saveField.index("photos")
                    value = ""
                    for i in tag:
                        photos_url = tag.url.get_text()
                        value = value + photos_url + ";"
                    sheet.write(cur, index, value)
                    continue
                if name in saveField:
                    index = saveField.index(name) #获取下标
                    value = tag.get_text() #获取值
                    sheet.write(cur, index, value) #保存
                if name == "location":
                    value = tag.get_text()
                    x,y = value.split(',')
                    x,y = coordinate_conversion.gcj02towgs84(float(x),float(y) ) #转换
                    # save x
                    index = saveField.index('x')
                    value = x
                    sheet.write(cur, index, value)
                    # save y
                    index = saveField.index('y')
                    value = y
                    sheet.write(cur, index, value)

            cur = cur+1
            if cur%11==0:
                print "当前已保存 %s 个信息" % cur

    w.save(outPath)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

geodoer

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值