pyspider递归调用

代码如下

    def predeal_page(self, response):
        page_qq = int(response.doc("html").find("#form\:j_id258").val())
        page_size = 1000
        if response.save['isFirst']==0:
            page_count = int(response.doc("html").find("#form\:j_id260").val())
        else:
            page_count = response.save['count']
        print("pagecount="+str(page_count))

        start_time = time.strftime('%Y%m%d',time.localtime(time.time() - 2*24*60*60))
        end_time = time.strftime('%Y%m%d',time.localtime(time.time()-1*24*60*60))
        print("共搜索" + str(page_count) + "条记录")
        actionUrl = "http://xxx.com/pages/erpt/rpt_main_report.jsf"

        dic={}
        dic["P_AUTHTAG"]={"index":1,"value":"1111","name":"1111","label":"","displayCondition":""}
        dic["P_DATE_TYPE"] ={"index":2,"value":"1","name":"订单时间","label":"日期类型","displayCondition":"true"}
        dic["P_START_DATE"] ={"index":3,"value":start_time,"name":start_time,"label":"订单开始日期","displayCondition":"true"}
        dic["P_END_DATE"] ={"index":4,"value":end_time,"name":end_time,"label":"订单结束日期","displayCondition":"true"}

        formj_id23 = str(base64.b64encode(str.encode(json.dumps(dic,ensure_ascii = False).replace(' ', ''))),encoding = "utf-8")
        print(formj_id23)

        headers = {}  
        headers["Content-Type"]="application/x-www-form-urlencoded" 
        headers["Origin"] = "http://report.mall.10010.com"
        data = {}
        data["AJAXREQUEST"] = "_viewRoot"
        data["_authKey"] = ""
        data["form:j_id23"] = formj_id23
        data["rpt_id"] = "TRADE_004"
        data["form:j_id26"] = "TRADE_004"
        data["form:j_id27"] = "/mallrpt/pages/trade_004.xhtml"
        data["form:j_id258"] = str(page_size)
        data["form:j_id259"] = "0"
        data["form:j_id260"] = "0"
        data["form:P_BROWSER_TYPE"] = "Chrome"
        data["form:P_BROWSER_VERSION"] = "43.0.2357.130"
        data["form:exportFormat"] = ""
        data["form:motionFlag"] = "query"
        data["form_SUBMIT"] = "1"
        data["javax.faces.ViewState"] = "rO0ABXVyABNbTGphdmEubGFuZy5PYmplY3Q7kM5YnxBzKWwCAAB4cAAAAAN0AAVqX2lkMXB0ACEv cGFnZXMvZXJwdC9ycHRfbWFpbl9yZXBvcnQueGh0bWw="
        #判断是否是第一次调用
        if response.save['isFirst'] == 0:
            data["form:j_id28"] = "form:j_id28"
        if response.save['isFirst'] == 1:
            data["form:j_id259"] = str(page_size*(response.save['num']-1))
            data["param1"] = "next"
            data["form:j_id255"] = "form:j_id255"
            data["form:j_id260"] = str(page_count)
         #循环递归调用   
        times = int((page_count-1)/page_size) + 1
        print("times="+str(times))
        if response.save['num'] < times:
            self.crawl(actionUrl+"?id="+str(response.save['num']),data=data,cookies=response.cookies, callback=self.detail_page,headers=headers,method="POST",priority=1)
            num = response.save['num'] + 1
            next=actionUrl+"?id2="+str(num)
            self.crawl(next,data=data,cookies=response.cookies, callback=self.predeal_page,headers=headers,method="POST",save={'isFirst':1,'num':num,'count':page_count})
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值