工具类一:
请求头中user-agent构造,模仿不同的浏览器访问网站
class FakeChromeUA(object):
first_num = random.randint(55,62)
third_num = random.randint(0,3200)
fourth_num = random.randint(0,140)
# 操作系统
os_type = ['(Windows NT 6.1; WOW64)','(Windows NT 10.0; WOW64)','(X11; Linux x86_64)','(Macintosh; Intel Mac OS X 10_9_0)']
chrome_version = f'Chrome/{first_num}.0.{third_num}.{fourth_num}'
def get_useragent(self):
return ' '.join(['Mozilla/5.0',random.choice(self.os_type),'AppleWebKit/537.36','(KHTML, like Gecko)',self.chrome_version,'Safari/537.36'])
class Spiders(FakeChromeUA):
def fetch(self,url,params=None,headers=None,cookies=None):
'''
用户代理 模仿不同的浏览器访问网站
:param url:
:param params:
:param headers:
:param cookies:
:return:
'''
try:
if not headers:
headers = {}
headers['user-agent'] = self.get_useragent()
self.wait_some_time()
response = requests.get(url,params=params,headers=headers,cookies=cookies)
if response.status_code == 200:
response.encoding = 'utf-8'
return response
except requests.ConnectionError:
return
def wait_some_time(self):
time.sleep(random.randint(100,300) / 1000)
工具类二:
用于下载资源时进度条显示
class util(object):
def progress_bar(self, resp: requests.models.Response, filename: str):
'''
write bytes to local from web , and show bar
:param resp:
:param filename:
:return:
'''
total_size = int(resp.headers.get('content-length', 0))
pro_bar = tqdm(total=total_size, unit='B', unit_scale=True)
with open(filename, 'wb') as f:
for size_data in resp.iter_content(chunk_size=1024):
f.write(size_data)
pro_bar.update(len(size_data))
pro_bar.refresh()