代码
import requests
from lxml import etree
from concurrent.futures import ThreadPoolExecutor
class Crawl_douban(object):
# 设定初始化方法,并设置实例变量header、page_url(list类型)分别记录爬虫的请求头和目标抓取页的url
def __init__(self):
self.url = []
self.header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3902.4 Safari/537.36',
}
# 将构造好的目标页URL存储于记录目标抓取页url的列表中
def page_url(self):
for i in range(0,