但线程中readlines()就已经全部读完文件了,
多线程共同调用同一个文件对象的io方法会造成偏移量错误的问题,你需要锁。(python3 有 os.pread 函数,这个是原子操作,可以不用锁)。
这个问题使用线程的效率价值在requests处,200M的文件读取还轮不到谈文件读取的多线程加速。实际上因为GIL,在python的多线程中你也无法提高IO速度。
import requests
import multiprocessing
import sys
urlfile = 'url.txt'
def gen_urls(filename):
with open(filename, 'r') as f:
l = f.readline()
while l:
l = l.strip('\n')
yield l
l = f.readline()
def check_url(url):
res = requests.get(url)
print("{s}: {u}".format(s=res.status_code, u=url))
def run_in_pool():
pool = multiprocessing.Pool(processes=8)
for url in gen_urls(urlfile):
print("To pool: {}".format(url))
pool.apply_async(check_url, (url,))
pool.close()
pool.join()
print("done")
def run_in_one():
for url in gen_urls(urlfile):
print("To pool: {}".format(url))
check_url(url)
print("done")
if __name__ == "__main__":
if sys.argv.pop() == "m":
run_in_pool()
else:
run_in_one()