并行化处理已经成为了很多工程项目的需求。本文展示了如何使用threadpool模块(支持Python 3.x)将普通的Python程序多线程化。
import requests
import bs4
import time
import threadpool
# build data array
data = []
for e in range(10,20):
data.append(str(e))
def print_now(request):
s = str(request.requestID)
print (s)
def crawling(link):
request_link = "http://www.wandoujia.com/search?key=" + str(link) + "&source=search"
response = requests.get(request_link)
# other code lines
# set the thread number
pool = threadpool.ThreadPool(10)
# makeRequests(some_callable, list_of_args, callback)
reqs = threadpool.makeRequests(crawling, data, print_now)
# run
[pool.putRequest(req) for req in reqs]
pool.wait()