python爬虫异常状态以及代理的封装

import requests
import time

# 状态码异常
class StateCodeError(Exception):
    pass

# 内容异常
class ContentError(Exception):
    pass


class RequestHelper:

    # 判断内容是不是有问题
    # 判断状态码是不是200
    # 判断异常错误
    def get(self, url, params=None, headers=None):
        response = None
        for retry_time in range(10):  # 处理代理过期问题
            try:
                response = requests.get(url=url, params=params, headers=headers)
                return response
            except requests.exceptions.ProxyError as e:
                print("代理失效过期,重新获取")
            except requests.exceptions.ConnectionError as e:
                print("连接错误")
            except requests.exceptions.ConnectTimeout as e:
                print("连接超时")
        return response

    def post(self, url, data=None, json=None, headers=None):
        response = None
        for retry_time in range(10):  # 处理代理过期问题
            try:
                requests.post(url=url, data=data, json=json, headers=headers)
                return response
            except requests.exceptions.ProxyError as e:
                print("代理失效过期,重新获取")
            except requests.exceptions.ConnectionError as e:
                print("代理连接错误")
            except requests.exceptions.ConnectTimeout as e:
                print("连接超时")
        return response

class ProxyHelper:

    def __init__(self):
        self.proxies = self.get_ip()
        # 错误状态码
        self.error_code = [403, 418, 429, 454]

    def get_ip(self):
        time.sleep(5)
        url = "生成代理ip链接"
        ip = requests.get(url).content.decode()
        proxies = {
            "http": f"http://"代理账号:代理密码"@{ip}",
            "https": f"http://"代理账号:代理密码"@{ip}"
        }
        print("获取新代理:", ip)
        return proxies


    def get(self, url, params=None, headers=None):
        response = None
        for retry_time in range(1, 11):  # 处理代理过期问题
            try:
                response = requests.get(url=url, params=params, headers=headers, proxies=self.proxies)
                # 判断状态码是否正常
                if response.status_code in self.error_code:
                    raise StateCodeError("异常状态码")
                # 判断内容是否正常
                if "<title>安全验证</title>" in response.content.decode():
                    raise ContentError("内容异常")
                return response
            except requests.exceptions.ProxyError as e:
                print("代理失效过期,重新获取")
                self.proxies = self.get_ip()
            except requests.exceptions.ConnectionError as e:
                print("连接错误")
                if retry_time % 3 == 0:
                    self.proxies = self.get_ip()
            except requests.exceptions.ConnectTimeout as e:
                print("连接超时")
                if retry_time % 3 == 0:
                    self.proxies = self.get_ip()
            except StateCodeError as e:
                print("异常状态码")
                self.proxies = self.get_ip()
            except ContentError as e:
                print("内容异常")
                self.proxies = self.get_ip()
        return response


    def post(self, url, data=None, json=None, headers=None):
        response = None
        for retry_time in range(1, 11):  # 处理代理过期问题
            try:
                response = requests.post(url=url, data=data, json=json, headers=headers, proxies=self.proxies)
                # 判断状态码是否正常
                if response.status_code in self.error_code:
                    raise StateCodeError("异常状态码")
                # 判断内容是否正常
                if "<title>安全验证</title>" in response.content.decode():
                    raise ContentError("内容异常")
                return response
            except requests.exceptions.ProxyError as e:
                print("代理失效过期,重新获取")
                self.proxies = self.get_ip()
            except requests.exceptions.ConnectionError as e:
                print("连接错误")
                if retry_time % 3 == 0:
                    self.proxies = self.get_ip()
            except requests.exceptions.ConnectTimeout as e:
                print("连接超时")
                if retry_time % 3 == 0:
                    self.proxies = self.get_ip()
            except StateCodeError as e:
                print("异常状态码")
                self.proxies = self.get_ip()
            except ContentError as e:
                print("内容异常")
                self.proxies = self.get_ip()
        return response


def test():
    reqeusts = ProxyHelper()
    response = reqeusts.get("https://www.xiachufang.com/explore/")
    if response:
        print(response.status_code)
        print(response.content.decode())

if __name__ == '__main__':
    test()
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值