Max retries exceeded with URL : 主要是连接太多没关闭.
sess = requests.session()
sess.keep_alive = False
也可以自定义配置POOLSIZE :
#作用于全局
requests.adapters.DEFAULT_RETRIES = 2
requests.adapters.DEFAULT_POOLSIZE = 100
#对某个会话配置
s = requests.Session()
adapter = requests.adapters.HTTPAdapter(100,100)
s.mount('http://', adapter)
s.mount('https://', adapter)
过滤代理及爬代理:
import gevent.monkey; gevent.monkey.patch_all()
import sys,threading,requests,re,random
from lxml import etree
import random
import gevent
import time
import requests.adapters
import requests.sessions
USER_AGENT_LIST = [
'MSIE (MSIE 6.0; X11; Linux; i686) Opera 7.23',
'Opera/9.20 (Macintosh; Intel Mac OS X; U; en)'