from concurrent import futures
import time
start_time=time.time()
from concurrent.futures import ThreadPoolExecutor,as_completed
import requests
from lxml import etree
url='https://www.soshuw.com/GuiMiZhiZhu/'
r=requests.get(url)
r.encoding='utf-8'
r=r.text
selector=etree.HTML(r)
s_xpath="//div[@id='novel50348']/dl/dd/a[1]/@href"
urls=selector.xpath(s_xpath)
urls=[f'https://www.soshuw.com{url}' for url in urls]
def craw(url):
r=requests.get(url)
r.encoding='utf-8'
r=r.text
select=etree.HTML(r)
t_path="//div[@class='read_title']//h1[1]/text()"
t=select.xpath(t_path)[0]
return(t)
# with ThreadPoolExecutor() as pool:
# results=pool.map(craw,urls)
# for result in results:
# print(result) #40秒
with ThreadPoolExecutor() as pool:
futures=[pool.submit(craw,url) for url in urls]
# for future in futures:
# print(future.result())#41秒
for future in as_completed(futures):
print(future.result())#41秒
print(time.time()-start_time)
ThreadPoolExecutor的使用语法
最新推荐文章于 2024-02-17 21:56:51 发布