如何使用thread模块呢?
比如说爬取cnblogs
import threading
import time
import requests
urls = [
f"https://www.cnblogs.com/#p{page}"
for page in range(1, 50+1)
]
def craw(url):
r = requests.get(url)
print(url,len(str(r.text)))
def single_thread():
print("multi_thread begin")
for url in urls:
craw(url)
print("multi_thread end")
def multi_thread():
print("multi_thread begin")
threads = []
for url in urls:
threads.append(
threading.Thread(target = craw,args = (url,)) #这是一个元组所以要加,
)
for thread in threads:
thread.start()
for thread in threads:
thread.join()
print("multi_thread end")
if __name__ == "__main__":
start = time.time()
single_thread()
end = time.time()
print("single_thread cost :", end - start, "second")
start = time.time()
multi_thread()
end = time.time()
print("multi_thread cost :", end - start, "second")