多线程简单使用案例:
from threading import Thread
import time
def crawl(url,page):
time.sleep(3)
print("\n抓取第"+str(page)+"网页内容。抓取网页链接:"+url)
threads = []
for page in range(1,11):
threads.append(Thread(target=crawl, args=(f"http://www.zuowen.com/sucai/diangu/index_{str(page)}.shtml",str(page))))
for thread in threads:
thread.start()
# 等待所有线程结束
for thread in threads:
thread.join()
print("所有线程执行完毕!&^&")
线程池简单使用案例:
from concurrent.futures import ThreadPoolExecutor
import time
def crawl(url,page):
time.sleep(3)
print("\n抓取第"+str(page)+"网页内容。抓取网页链接:"+url)
with ThreadPoolExecutor(max_workers=3) as t:
for page in range(1, 11):
args = [f"http://www.zuowen.com/xiezuozd/xiezuofa/index_{str(page)}.shtml", str(page)]
t.submit(lambda p: crawl(*p), args)
print("所有线程执行完毕!&^&")
多进程简单使用案例:
import time
def crawl(url,page):
time.sleep(3)
print("\n抓取第"+str(page)+"网页内容。抓取网页链接:"+url)
if __name__ == '__main__':
processes = []
for page in range(1, 11):
processes.append(
Process(target=crawl, args=(f"http://www.zuowen.com/sucai/diangu/index_{str(page)}.shtml", str(page))))
for process in processes:
process.start()
for process in processes:
process.join()
print("所有进程执行完毕!&^&")
进程池简单使用案例:
# from concurrent.futures import ProcessPoolExecutor
import time
from concurrent.futures import ProcessPoolExecutor, as_completed
import random
def crawl(url,page):
time.sleep(3)
print("\n抓取第"+str(page)+"网页内容。抓取网页链接:"+url)
if __name__ == '__main__':
with ProcessPoolExecutor(max_workers=3) as t:
for page in range(1, 11):
# args = [f"http://www.zuowen.com/xiezuozd/xiezuofa/index_{str(page)}.shtml", str(page)]
t.submit(crawl,f"http://www.zuowen.com/xiezuozd/xiezuofa/index_{str(page)}.shtml",str(page))
print("所有进程执行完毕!&^&")