spider-gaode-3

import requests
import json
import time
import pandas as pd
import hashlib
import random
from django.shortcuts import render,HttpResponse
from . import models


sl = set() # url存储
count = 0 # key计数
req_count = 0 # 请求数量
keys = ['227b360433411dd173a81ab636a89543','6d286676e227411e91e045d97994bfa3','8325164e247e15eea68b59e89200988b']

def getTypes():
  df = pd.read_excel(r'D:\source\gaode\gaodeMap\gaodeSpider\amap_poicode.xlsx',sheet_name='POI分类与编码(中英文)')
  type_list = list(df.iloc[:,1].map(lambda x: str(x).rjust(6,'0')))
  return type_list

def getHeaders():
  USER_AGENTS = [
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 LBBROWSER",
    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)",
    "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",
    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; 360SE)",
    "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1",
    "Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5",
    "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0b13pre) Gecko/20110307 Firefox/4.0b13pre",
    "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11",
    "Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10"
  ]

  headers={ "User-Agent":random.choice(USER_AGENTS) }
  return headers

def getKeys():
  global count
  if  count == 0:
    count += 1
    return keys[0]
  count = 0
  return keys[1] 

def getjson(page,types):
    pa = {
        'key': getKeys(),
        'keywords': '',
        'types':types,
        'city':610113,
        'citylimit':'true',
        'offset': 20,
        'extensions': 'all',
        'children':1,
        'page':page
    }
    r = requests.get('https://restapi.amap.com/v3/place/text?', params=pa, headers=getHeaders())
    print('第{}次请求,请求url是{}'.format(req_count,r.url))
    # url 去重
    md5_obj = hashlib.md5()
    md5_obj.update((r.url).encode(encoding='utf-8'))
    encryp_url = md5_obj.hexdigest()
    if encryp_url in sl:
      return False
    sl.add(encryp_url)
    decodejson = json.loads(r.text)
    return decodejson

 
def gaodeSpider(request):
    for types in getTypes():
        not_last_page = True
        page = 1
        while not_last_page:
            decodejson = getjson(page,types)
            if decodejson:
                if(decodejson['count'] == '0'):
                    not_last_page = False
                    break
                for eachone in decodejson['pois']:
                    try:
                        id = eachone['id']      
                    except:
                        id = None
                    try:
                        parentId = eachone['parent']   
                    except:
                        parentId = None
                    try:
                        location = eachone['location']   
                    except:
                        location = None
                    try:
                        name = eachone['name']
                    except:
                        name = None
                    try:
                        address = eachone['address']  
                    except:
                        address = None
                    try:
                        tel = eachone['tel']    
                    except:
                        tel = None
                    try:
                        typecode = eachone['typecode']    
                    except:
                        typecode = None
                    try:
                        adcode = eachone['typecode']    
                    except:
                        adcode = None
                    try:
                        distance = eachone['distance']     
                    except:
                        distance = None
                    try:
                        pcode = eachone['pcode']        
                    except:
                        pcode = None
                    try:
                        importance = eachone['importance']
                    except:
                        importance = None
                    try:
                        biz_ext = eachone['biz_ext']
                    except:
                        biz_ext = None
                    try:
                        recommend = eachone['recommend']    
                    except:
                        recommend = None
                    try:
                        type = eachone['type']    
                    except:
                        type = None
                    try:
                        photos = eachone['photos']   
                    except:
                        photos = None
                    try:
                        discount_num = eachone['discount_num']       
                    except:
                        discount_num = None
                    try:
                        gridcode = eachone['gridcode']         
                    except:
                        gridcode = None
                    try:
                        typecode = eachone['typecode']         
                    except:
                        typecode = None
                    try:
                        shopinfo = eachone['shopinfo']
                    except:
                        shopinfo = None
                    try:
                        poiweight = eachone['poiweight']
                    except:
                        poiweight = None
                    try:
                        citycode = eachone['citycode']    
                    except:
                        citycode = None
                    try:
                        children = eachone['children']    
                    except:
                        children = None
                    try:
                        alias = eachone['alias']    
                    except:
                        alias = None
                    try:
                        tag = eachone['tag']   
                    except:
                        tag = None
                    try:
                        event = eachone['event']   
                    except:
                        event = None
                    try:
                        entr_location = eachone['entr_location']    
                    except:
                        entr_location = None
                    try:
                        indoor_map = eachone['indoor_map']    
                    except:
                        indoor_map = None
                    try:
                        email = eachone['email']    
                    except:
                        email = None
                    try:
                        timestamp = eachone['timestamp']    
                    except:
                        timestamp = None
                    try:
                        website = eachone['website']   
                    except:
                        website = None
                    try:
                        pname = eachone['pname']    
                    except:
                        pname = None
                    try:
                        biz_type = eachone['biz_type']    
                    except:
                        biz_type = None
                    try:
                        cityname = eachone['cityname']    
                    except:
                        cityname = None
                    try:
                        postcode = eachone['postcode']   
                    except:
                        postcode = None
                    try:
                        match = eachone['match']    
                    except:
                        match = None
                    try:
                        business_area = eachone['business_area']    
                    except:
                        business_area = None
                    try:
                        indoor_data = eachone['indoor_data']  
                    except:
                        indoor_data = None
                    try:
                        childtype = eachone['childtype']    
                    except:
                        childtype = None
                    try:
                        exit_location = eachone['exit_location']    
                    except:
                        exit_location = None
                    try:
                        location = eachone['location']    
                    except:
                        location = None
                    try:
                        shopid = eachone['shopid']   
                    except:
                        shopid = None
                    try:
                        navi_poiid = eachone['navi_poiid']    
                    except:
                        navi_poiid = None
                    try:
                        groupbuy_num = eachone['groupbuy_num']    
                    except:
                        groupbuy_num = None

                    data={
                        'distance':distance,
                        'pcode':pcode,
                        'importance':importance,
                        'recommend':recommend,
                        'type':type,
                        'photos':photos,
                        'discount_num':discount_num,
                        'gridcode':gridcode,
                        'typecode':typecode,
                        'shopinfo':shopinfo,
                        'poiweight':poiweight,
                        'groupbuy_num':groupbuy_num,
                        'navi_poiid':navi_poiid,
                        'shopid':shopid,
                        'location':location,
                        'exit_location':exit_location,
                        'childtype':childtype,
                        'indoor_data':indoor_data,
                        'business_area':business_area,
                        'match':match,
                        'postcode':postcode,
                        'cityname':cityname,
                        'biz_type':biz_type,
                        'pname':pname,
                        'biz_ext':biz_ext,
                        'website':website,
                        'timestamp':timestamp,
                        'email':email,
                        'indoor_map':indoor_map,
                        'entr_location':entr_location,
                        'event':event,
                        'tag':tag,
                        'alias':alias,
                        'children':children,
                        'citycode':citycode,
                    }
                    models.gaodeSpider(
                        _id = id,
                        parentId = parentId,
                        location = location,
                        name = name,
                        address = address,
                        tel = tel,
                        typecode = typecode,
                        adcode = adcode,
                        data = data,
                        count = count,
                    ).save()
                    time.sleep(0.2)
                page += 1
            else:
                not_last_page = False
    return HttpResponse()
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值