import requests import time # 状态码异常 class StateCodeError(Exception): pass # 内容异常 class ContentError(Exception): pass class RequestHelper: # 判断内容是不是有问题 # 判断状态码是不是200 # 判断异常错误 def get(self, url, params=None, headers=None): response = None for retry_time in range(10): # 处理代理过期问题 try: response = requests.get(url=url, params=params, headers=headers) return response except requests.exceptions.ProxyError as e: print("代理失效过期,重新获取") except requests.exceptions.ConnectionError as e: print("连接错误") except requests.exceptions.ConnectTimeout as e: print("连接超时") return response def post(self, url, data=None, json=None, headers=None): response = None for retry_time in range(10): # 处理代理过期问题 try: requests.post(url=url, data=data, json=json, headers=headers) return response except requests.exceptions.ProxyError as e: print("代理失效过期,重新获取") except requests.exceptions.ConnectionError as e: print("代理连接错误") except requests.exceptions.ConnectTimeout as e: print("连接超时") return response class ProxyHelper: def __init__(self): self.proxies = self.get_ip() # 错误状态码 self.error_code = [403, 418, 429, 454] def get_ip(self): time.sleep(5) url = "生成代理ip链接" ip = requests.get(url).content.decode() proxies = { "http": f"http://"代理账号:代理密码"@{ip}", "https": f"http://"代理账号:代理密码"@{ip}" } print("获取新代理:", ip) return proxies def get(self, url, params=None, headers=None): response = None for retry_time in range(1, 11): # 处理代理过期问题 try: response = requests.get(url=url, params=params, headers=headers, proxies=self.proxies) # 判断状态码是否正常 if response.status_code in self.error_code: raise StateCodeError("异常状态码") # 判断内容是否正常 if "<title>安全验证</title>" in response.content.decode(): raise ContentError("内容异常") return response except requests.exceptions.ProxyError as e: print("代理失效过期,重新获取") self.proxies = self.get_ip() except requests.exceptions.ConnectionError as e: print("连接错误") if retry_time % 3 == 0: self.proxies = self.get_ip() except requests.exceptions.ConnectTimeout as e: print("连接超时") if retry_time % 3 == 0: self.proxies = self.get_ip() except StateCodeError as e: print("异常状态码") self.proxies = self.get_ip() except ContentError as e: print("内容异常") self.proxies = self.get_ip() return response def post(self, url, data=None, json=None, headers=None): response = None for retry_time in range(1, 11): # 处理代理过期问题 try: response = requests.post(url=url, data=data, json=json, headers=headers, proxies=self.proxies) # 判断状态码是否正常 if response.status_code in self.error_code: raise StateCodeError("异常状态码") # 判断内容是否正常 if "<title>安全验证</title>" in response.content.decode(): raise ContentError("内容异常") return response except requests.exceptions.ProxyError as e: print("代理失效过期,重新获取") self.proxies = self.get_ip() except requests.exceptions.ConnectionError as e: print("连接错误") if retry_time % 3 == 0: self.proxies = self.get_ip() except requests.exceptions.ConnectTimeout as e: print("连接超时") if retry_time % 3 == 0: self.proxies = self.get_ip() except StateCodeError as e: print("异常状态码") self.proxies = self.get_ip() except ContentError as e: print("内容异常") self.proxies = self.get_ip() return response def test(): reqeusts = ProxyHelper() response = reqeusts.get("https://www.xiachufang.com/explore/") if response: print(response.status_code) print(response.content.decode()) if __name__ == '__main__': test()
python爬虫异常状态以及代理的封装
于 2024-09-07 09:42:02 首次发布