爬虫 访问 重试

不知道为啥,以前一直好好的,现在访问链接经常报错,所以加了错误重试

# coding:utf-8
import requests
import http.cookiejar
import re
import sys
import smtplib
import time
from email.mime.text import MIMEText


retries1=30
_to = "xxxxxxx@360.cn"

class SendQqMail:
    def getqqmail(self, retries):

        _user = "xxxxxxxx@qq.com"
        _pwd = "xxxxxxxxxxxxxxxxx"

        msg = MIMEText(listitem1)
        msg["Subject"] = listitem1
        msg["From"] = _user
        msg["To"] = _to

        try:
            s = smtplib.SMTP_SSL("smtp.qq.com", 465)
            s.login(_user, _pwd)
            s.sendmail(_user, _to, msg.as_string())
            s.quit()
            print "Send QQ Email Success!"
        except smtplib.SMTPException, e:
            print "retry.QQ mail..............." % e
            if retries > 0:
                return self.getqqmail(retries - 1)
            else:
                print "Send QQ Email Falied,%s" % e

class Send163Mail:
    def get163mail(self, retries):

        _user = "xxxxxxxxxxxxxx@163.com"
        _pwd = "xxxxxxxxxxxxxxxxxxxxx"

        msg = MIMEText(listitem1)
        msg["Subject"] = listitem1
        msg["From"] = _user
        msg["To"] = _to

        try:
            s = smtplib.SMTP_SSL("smtp.163.com", 465)
            s.login(_user, _pwd)
            s.sendmail(_user, _to, msg.as_string())
            s.quit()
            print "Send 163 Email Success!"
        except smtplib.SMTPException, e:
            print "retry.163mail..............." % e
            if retries > 0:
                return self.get163mail(retries - 1)
            else:
                print "Send 163 Email Falied,%s" % e



class Urlrequest:
    def get(self, retries):
        try:
            purposurl1 = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
            resp1 = session.get(purposurl1, headers=headers, allow_redirects=False)
            buffer1 = str(resp1.text)
            getarticlelist1 = re.compile(r'<td style="overflow:hidden;word-break:break-all;word-wrap:break-word;"\s*([\S\s]*?)\s*</td>')
            pagemsg1 = re.findall(getarticlelist1, buffer1)
        except Exception, what:
            #print what, req
            if retries > 0:
                return self.get(retries - 1)
            else:
                print 'visit url Failed',
    return pagemsg1


headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:48.0) Gecko/20100101 Firefox/48.0'}
filename = 'cookie'

# 建立一个会话,可以把同一用户的不同请求联系起来;直到会话结束都会自动处理cookies
session = requests.Session()
# 建立LWPCookieJar实例,可以存Set-Cookie3类型的文件。
session.cookies = http.cookiejar.LWPCookieJar(filename)
# 若本地有cookie则不用再post数据了
try:
    session.cookies.load(filename=filename, ignore_discard=True)
    print ('load local cookies successed')
except:
    print('Cookies load failed!')

print('attemptting to login...')
url = 'https://xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
data = {'user': 'zhangkun-s',
        'passwd': 'zk584807419360!!!',
        'ref': 'http:xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx',
        'tag': '',
        'src': 'qihoo'}
result = session.post(url, data=data, headers=headers)
# 保存cookie到本地
session.cookies.save(ignore_discard=True, ignore_expires=True)

reload(sys)
sys.setdefaultencoding("utf-8")

get_url = 'http://xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
# allow_redirects=False 禁止重定向
resp = session.get(get_url, headers=headers, allow_redirects=False)


purposurl = 'http://xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
resp = session.get(purposurl, headers=headers, allow_redirects=False)
buffer = str(resp.text)
getarticlelist = re.compile(r'<td style="overflow:hidden;word-break:break-all;word-wrap:break-word;"\s*([\S\s]*?)\s*</td>')
pagemsg = re.findall(getarticlelist,buffer)
print ('system boot successed')
while True:
    time.sleep(60)
    buff = Urlrequest()
    pagemsg1 = buff.get(retries1)
    for listitem1 in pagemsg1:
            if listitem1 in pagemsg:
                pass
            else:

                if '[fix_error]' in listitem1 or '[init_error]' in listitem1 or '[exit_unexpect]' in listitem1 or '[android]' in listitem1 or '[FIX_ERROR]' in listitem1 or '[INIT_ERROR]' in listitem1 or '[EXIT_UNEXPECT]' in listitem1 or '[ANDROID]' in listitem1:
                        pagemsg.append(listitem1)
                else:
                        pagemsg.append(listitem1)

                        try:
                            print 'find a new feedback'
                            qqsendmailer = SendQqMail()
                            time.sleep(5)
                            qqsendmailer.getqqmail(10)
                        except:
                            print "QQ mail try five times fail,change 163mail"
                            neteasysendmailer = Send163Mail()
                            time.sleep(5)
                            neteasysendmailer.get163mail(10)
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值