放一个目前没问题的。
import multiprocessing
import json
import time
import fcntl
def init():
with open('list.txt', 'w') as f:
f.write(json.dumps({'sids': {}}))
def list_tmp(data=None):
if data is None:
with open('list.txt', 'r') as f:
fcntl.flock(f, fcntl.LOCK_EX)
data = f.read()
return json.loads(data)
else:
with open('list.txt', 'r+') as f:
fcntl.flock(f, fcntl.LOCK_EX)
text = f.read()
text = json.loads(text)
text['sids'][data[0]] = data[1]
f.seek(0)
f.truncate()
f.write(json.dumps(text))
def generate(sid):
target = 'http://www.baidu.com'
data = (sid, target)
list_tmp(data)
time.sleep(1)
if __name__ == '__main__':
init()
processes = []
for i in range(100):
p = multiprocessing.Process(target=generate, args=(i,))
processes.append(p)
p.start()
for p in processes:
p.join()
with open('list.txt', 'r') as f:
data = f.read()
data = json.loads(data)
print(len(data.get('sids')))
之前的代码读入旧数据不应该在写的部分的外面,这样多进程读入的本来就是不全的数据,另外 seek 和 truncate 也有很明显的作用,原因不知。。。