import requests
import json
import time
import pandas as pd
import hashlib
import random
from django.shortcuts import render,HttpResponse
from . import models
sl = set() # url存储
count = 0 # key计数
req_count = 0 # 请求数量
keys = ['227b360433411dd173a81ab636a89543','6d286676e227411e91e045d97994bfa3','8325164e247e15eea68b59e89200988b']
def getTypes():
df = pd.read_excel(r'D:\source\gaode\gaodeMap\gaodeSpider\amap_poicode.xlsx',sheet_name='POI分类与编码(中英文)')
type_list = list(df.iloc[:,1].map(lambda x: str(x).rjust(6,'0')))
return type_list
def getHeaders():
USER_AGENTS = [
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 LBBROWSER",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; 360SE)",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1",
"Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5",
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0b13pre) Gecko/20110307 Firefox/4.0b13pre",
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11",
"Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10"
]
headers={ "User-Agent":random.choice(USER_AGENTS) }
return headers
def getKeys():
global count
if count == 0:
count += 1
return keys[0]
count = 0
return keys[1]
def getjson(page,types):
pa = {
'key': getKeys(),
'keywords': '',
'types':types,
'city':610113,
'citylimit':'true',
'offset': 20,
'extensions': 'all',
'children':1,
'page':page
}
r = requests.get('https://restapi.amap.com/v3/place/text?', params=pa, headers=getHeaders())
print('第{}次请求,请求url是{}'.format(req_count,r.url))
# url 去重
md5_obj = hashlib.md5()
md5_obj.update((r.url).encode(encoding='utf-8'))
encryp_url = md5_obj.hexdigest()
if encryp_url in sl:
return False
sl.add(encryp_url)
decodejson = json.loads(r.text)
return decodejson
def gaodeSpider(request):
for types in getTypes():
not_last_page = True
page = 1
while not_last_page:
decodejson = getjson(page,types)
if decodejson:
if(decodejson['count'] == '0'):
not_last_page = False
break
for eachone in decodejson['pois']:
try:
id = eachone['id']
except:
id = None
try:
parentId = eachone['parent']
except:
parentId = None
try:
location = eachone['location']
except:
location = None
try:
name = eachone['name']
except:
name = None
try:
address = eachone['address']
except:
address = None
try:
tel = eachone['tel']
except:
tel = None
try:
typecode = eachone['typecode']
except:
typecode = None
try:
adcode = eachone['typecode']
except:
adcode = None
try:
distance = eachone['distance']
except:
distance = None
try:
pcode = eachone['pcode']
except:
pcode = None
try:
importance = eachone['importance']
except:
importance = None
try:
biz_ext = eachone['biz_ext']
except:
biz_ext = None
try:
recommend = eachone['recommend']
except:
recommend = None
try:
type = eachone['type']
except:
type = None
try:
photos = eachone['photos']
except:
photos = None
try:
discount_num = eachone['discount_num']
except:
discount_num = None
try:
gridcode = eachone['gridcode']
except:
gridcode = None
try:
typecode = eachone['typecode']
except:
typecode = None
try:
shopinfo = eachone['shopinfo']
except:
shopinfo = None
try:
poiweight = eachone['poiweight']
except:
poiweight = None
try:
citycode = eachone['citycode']
except:
citycode = None
try:
children = eachone['children']
except:
children = None
try:
alias = eachone['alias']
except:
alias = None
try:
tag = eachone['tag']
except:
tag = None
try:
event = eachone['event']
except:
event = None
try:
entr_location = eachone['entr_location']
except:
entr_location = None
try:
indoor_map = eachone['indoor_map']
except:
indoor_map = None
try:
email = eachone['email']
except:
email = None
try:
timestamp = eachone['timestamp']
except:
timestamp = None
try:
website = eachone['website']
except:
website = None
try:
pname = eachone['pname']
except:
pname = None
try:
biz_type = eachone['biz_type']
except:
biz_type = None
try:
cityname = eachone['cityname']
except:
cityname = None
try:
postcode = eachone['postcode']
except:
postcode = None
try:
match = eachone['match']
except:
match = None
try:
business_area = eachone['business_area']
except:
business_area = None
try:
indoor_data = eachone['indoor_data']
except:
indoor_data = None
try:
childtype = eachone['childtype']
except:
childtype = None
try:
exit_location = eachone['exit_location']
except:
exit_location = None
try:
location = eachone['location']
except:
location = None
try:
shopid = eachone['shopid']
except:
shopid = None
try:
navi_poiid = eachone['navi_poiid']
except:
navi_poiid = None
try:
groupbuy_num = eachone['groupbuy_num']
except:
groupbuy_num = None
data={
'distance':distance,
'pcode':pcode,
'importance':importance,
'recommend':recommend,
'type':type,
'photos':photos,
'discount_num':discount_num,
'gridcode':gridcode,
'typecode':typecode,
'shopinfo':shopinfo,
'poiweight':poiweight,
'groupbuy_num':groupbuy_num,
'navi_poiid':navi_poiid,
'shopid':shopid,
'location':location,
'exit_location':exit_location,
'childtype':childtype,
'indoor_data':indoor_data,
'business_area':business_area,
'match':match,
'postcode':postcode,
'cityname':cityname,
'biz_type':biz_type,
'pname':pname,
'biz_ext':biz_ext,
'website':website,
'timestamp':timestamp,
'email':email,
'indoor_map':indoor_map,
'entr_location':entr_location,
'event':event,
'tag':tag,
'alias':alias,
'children':children,
'citycode':citycode,
}
models.gaodeSpider(
_id = id,
parentId = parentId,
location = location,
name = name,
address = address,
tel = tel,
typecode = typecode,
adcode = adcode,
data = data,
count = count,
).save()
time.sleep(0.2)
page += 1
else:
not_last_page = False
return HttpResponse()
“相关推荐”对你有帮助么?
-
非常没帮助
-
没帮助
-
一般
-
有帮助
-
非常有帮助
提交