- 线程池使用方法介绍
实例化线程池对象
with ThreadPoolExecutor(max_workers=线程数目) as pool:
results = pool.map(要执行的函数, 函数的参数迭代数据)
for result in results:
print(result)
- 代码实现
import requests
from lxml import etree
import time
from concurrent.futures import ThreadPoolExecutor
class QiuBaiSpider(object):
def __init__(self, url):
self.url = url
self.headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36"
}
def send_request(self):
"""
发送请求、获取响应的方法
:return: 响应内容
"""
print(f"正在抓取: {self.url}")
response = requests.get(self.url, headers=self.headers)
return response.content.decode()
def get_content_list(self, html_str):
"""
解析数据的方法
:param html_str: 响应的网页源码字符串
:return: 数据的列表
"""
html = etree.HTML(html_str)
div_list = html.xpath("//div[@class='col1 old-style-col1']/div")
content_list = []
for div in div_list:
item = {}
item["user_name"] = div.xpath(".//h2/text()")[0].strip()
content_list.append(item)
return content_list
def save_content_list(self, content_list):
"""
保存数据的方法
:param content_list: 要保存的数据列表
:return: 无
"""
for content in content_list:
print(content)
def run(self):
html_str = self.send_request()
content_list = self.get_content_list(html_str)
self.save_content_list(content_list)
return content_list
def downloader(page):
runner = QiuBaiSpider(f"https://www.qiushibaike.com/text/page/{page}/")
result = runner.run()
return result
def main():
page_list = range(1, 14)
with ThreadPoolExecutor(max_workers=5) as pool:
res = pool.map(downloader, page_list)
for i in res:
print(i)
if __name__ == '__main__':
now = time.time()
main()
print(time.time()-now)