爬取的数据仅用于科研
高德地图POI服务:http://lbs.amap.com/api/webservice/guide/api/search
结果:爬取厦门的公司企业信息
数据可以查看高德地图POI服务,提供哪些数据
coordinate_conversion文件:https://blog.csdn.net/summer_dew/article/details/80723434
# -*- coding:utf-8 -*-
# function: 爬取高德POI服务数据
import xlwt
import urllib
from bs4 import BeautifulSoup
import coordinate_conversion
TYPE = u"公司企业"
outPath = r"D:\Users\PasserQi\Desktop\GetAMapPOI\%s.xls" % TYPE
saveField = ["id","name","type","typecode","address","x","y","pname","cityname","business_area","photos"]
AMAP_API_KEY = "4fac3db866dcc3b8a******d3a7db1c7" #高德地图密匙
urlParamJson = {
'city' : '厦门',
'output' : 'xml',
'key' : AMAP_API_KEY,
'types' : TYPE.encode("utf8"),
'citylimit' : 'true', #只返回指定城市数据
'offset' : '20'#每页条数
}
MIN_PAGE = 1
MAX_PAGE = 100 #最大页数
def setStype(name, height, bold=False):
style = xlwt.XFStyle() #init style
font = xlwt.Font() #为样式创建字体
font.name = name
font.bold = bold
font.color_index = 4
font.height = height
return style
if __name__ == '__main__':
#create
w = xlwt.Workbook(encoding="utf-8")
#create sheet
sheet = w.add_sheet(TYPE)
for i in range( len(saveField) ) :
sheet.write(0, i, saveField[i])
cur = 1
for page in range(MIN_PAGE, MAX_PAGE): # 页数
urlParamJson["page"] = page
print "当前 %s 页..." % page
params = urllib.urlencode(urlParamJson)
url = "http://restapi.amap.com/v3/place/text?%s" % params
http = urllib.urlopen(url)
dom = BeautifulSoup(http)
poiList = dom.findAll("poi")
if len(poiList) == 0: #没有
break
for poi in poiList:
for tag in poi:
name = tag.name #标签名
print name
if name == "photos": #图片
index = saveField.index("photos")
value = ""
for i in tag:
photos_url = tag.url.get_text()
value = value + photos_url + ";"
sheet.write(cur, index, value)
continue
if name in saveField:
index = saveField.index(name) #获取下标
value = tag.get_text() #获取值
sheet.write(cur, index, value) #保存
if name == "location":
value = tag.get_text()
x,y = value.split(',')
x,y = coordinate_conversion.gcj02towgs84(float(x),float(y) ) #转换
# save x
index = saveField.index('x')
value = x
sheet.write(cur, index, value)
# save y
index = saveField.index('y')
value = y
sheet.write(cur, index, value)
cur = cur+1
if cur%11==0:
print "当前已保存 %s 个信息" % cur
w.save(outPath)