使用python multiprocessing模块多线程创建空文件
#!/usr/bin/env python
#-*- coding: UTF-8 -*-
import os
import multiprocessing
import sys
def run(srcpath):
if os.path.exists(srcpath):
for i in xrange(1): #创建100个一级目录
firstPath = srcpath + os.sep + "tfs" + str(i)
if not os.path.exists(firstPath):
os.mkdir(firstPath)
index = 0
for j in xrange(1): #创建200个二级目录
secondPath = firstPath + os.sep + "tfs" + str(j)
if not os.path.exists(secondPath):
os.mkdir(secondPath)
#创建2亿个文件为10000个, 创建1亿个文件为5000个,创建1000万个文件为500,创建100万个文件为50
while index < 1000:
fileName = secondPath + os.sep + "tfs_file" + str(index)
f = open(fileName, "w")
f.write("1")
f.close()
#建立空文件
# os.mknod(fileName)
index += 1
index = 0
if __name__ == "__main__":
subprocNums = int(sys.argv[1])
srcpath = sys.argv[2]
procNum = 0
while procNum < subprocNums:
procNum = procNum + 1
subProc = multiprocessing.Process(target=run,args=(srcpath,))
subProc.start()
print 'has start subprocess', procNum
2、使用进程池
是的,你没有看错,不是线程池。它可以让你跑满多核CPU,而且使用方法非常简单。
注意要用apply_async,如果落下async,就变成阻塞版本了。
processes=4是最多并发进程数量。
import multiprocessing
import time
def func(msg):
for i in xrange(3):
print msg
time.sleep(1)
if __name__ == "__main__":
pool = multiprocessing.Pool(processes=4)
for i in xrange(10):
msg = "hello %d" %(i)
pool.apply_async(func, (msg, ))
pool.close()
pool.join()
print "Sub-process(es) done."
3、使用Pool,并需要关注结果
更多的时候,我们不仅需要多进程执行,还需要关注每个进程的执行结果,如下:
import multiprocessing
import time
def func(msg):
for i in xrange(3):
print msg
time.sleep(1)
return "done " + msg
if __name__ == "__main__":
pool = multiprocessing.Pool(processes=4)
result = []
for i in xrange(10):
msg = "hello %d" %(i)
result.append(pool.apply_async(func, (msg, )))
pool.close()
pool.join()
for res in result:
print res.get()
print "Sub-process(es) done."