Python多进程之间通信的坑
最近有使用到Python的多进程功能,不用不知道,一用就发现废了。
主处理的多个进程来进行运算,另外开一个进程来进行数据写入文件。
据我的所知,可以使用Queue来进行通信,结果是有下面错误的示范。
错误示范
# 错误示范
from queue import Queue
from multiprocessing import Process
from random import randint
import argparse
def multi(test_data, q):
if test_data % 2 == 0:
q.put(test_data)
def write_result(q):
while True:
if q.qsize() > 0:
ret_data = q.get()
print(ret_data)
if ret_data == 'stop':
break
with open('text.txt', 'a') as f:
f.write(''.join([str(ret_data), '\n']))
def main(procossor):
q = Queue(1000)
w_p = Process(target=write_result, args=(q, ))
w_p.start()
test_list = [randint(0, 100) for i in range(10000)]
while len(test_list) > 0:
for i in range(procossor):
if len(test_list) > 0:
test_data = test_list.pop()
p = Process(target=multi, args=(test_data, q, ))
p.start()
else:
continue
p.join()
q.put('stop')
w_p.join()
if __name__ == '__main__':
with open('text.txt', 'w') as f:
pass
parser = argparse.ArgumentParser()
parser.add_argument("-p", "--PROCESOR", default=100, type=int, help="use procesor")
args = parser.parse_args()
main(args.PROCESOR)
错误示范是使用了queue库里的Queue。源码如下:
class Queue:
'''Create a queue object with a given maximum size.
If maxsize is <= 0, the queue size is infinite.
'''
def __init__(self, maxsize=0):
self.maxsize = maxsize
self._init(maxsize)
# mutex must be held whenever the queue is mutating. All methods
# that acquire mutex must release it before returning. mutex
# is shared between the three conditions, so acquiring and
# releasing the conditions also acquires and releases mutex.
self.mutex = threading.Lock()
# Notify not_empty whenever an item is added to the queue; a
# thread waiting to get is notified then.
self.not_empty = threading.Condition(self.mutex)
# Notify not_full whenever an item is removed from the queue;
# a thread waiting to put is notified then.
self.not_full = threading.Condition(self.mutex)
# Notify all_tasks_done whenever the number of unfinished tasks
# drops to zero; thread waiting to join() is notified to resume
self.all_tasks_done = threading.Condition(self.mutex)
self.unfinished_tasks = 0
def task_done(self):
'''Indicate that a formerly enqueued task is complete.
Used by Queue consumer threads. For each get() used to fetch a task,
a subsequent call to task_done() tells the queue that the processing
on the task is complete.
If a join() is currently blocking, it will resume when all items
have been processed (meaning that a task_done() call was received
for every item that had been put() into the queue).
Raises a ValueError if called more times than there were items
placed in the queue.
'''
with self.all_tasks_done:
unfinished = self.unfinished_tasks - 1
if unfinished <= 0:
if unfinished < 0:
raise ValueError('task_done() called too many times')
self.all_tasks_done.notify_all()
self.unfinished_tasks = unfinished
def join(self):
'''Blocks until all items in the Queue have been gotten and processed.
The count of unfinished tasks goes up whenever an item is added to the
queue. The count goes down whenever a consumer thread calls task_done()
to indicate the item was retrieved and all work on it is complete.
When the count of unfinished tasks drops to zero, join() unblocks.
'''
with self.all_tasks_done:
while self.unfinished_tasks:
self.all_tasks_done.wait()
def qsize(self):
'''Return the approximate size of the queue (not reliable!).'''
with self.mutex:
return self._qsize()
def empty(self):
'''Return True if the queue is empty, False otherwise (not reliable!).
This method is likely to be removed at some point. Use qsize() == 0
as a direct substitute, but be aware that either approach risks a race
condition where a queue can grow before the result of empty() or
qsize() can be used.
To create code that needs to wait for all queued tasks to be
completed, the preferred technique is to use the join() method.
'''
with self.mutex:
return not self._qsize()
def full(self):
'''Return True if the queue is full, False otherwise (not reliable!).
This method is likely to be removed at some point. Use qsize() >= n
as a direct substitute, but be aware that either approach risks a race
condition where a queue can shrink before the result of full() or
qsize() can be used.
'''
with self.mutex:
return 0 < self.maxsize <= self._qsize()
def put(self, item, block=True, timeout=None):
'''Put an item into the queue.
If optional args 'block' is true and 'timeout' is None (the default),
block if necessary until a free slot is available. If 'timeout' is
a non-negative number, it blocks at most 'timeout' seconds and raises
the Full exception if no free slot was available within that time.
Otherwise ('block' is false), put an item on the queue if a free slot
is immediately available, else raise the Full exception ('timeout'
is ignored in that case).
'''
with self.not_full:
if self.maxsize > 0:
if not block:
if self._qsize() >= self.maxsize:
raise Full
elif timeout is None:
while self._qsize() >= self.maxsize:
self.not_full.wait()
elif timeout < 0:
raise ValueError("'timeout' must be a non-negative number")
else:
endtime = time() + timeout
while self._qsize() >= self.maxsize:
remaining = endtime - time()
if remaining <= 0.0:
raise Full
self.not_full.wait(remaining)
self._put(item)
self.unfinished_tasks += 1
self.not_empty.notify()
def get(self, block=True, timeout=None):
'''Remove and return an item from the queue.
If optional args 'block' is true and 'timeout' is None (the default),
block if necessary until an item is available. If 'timeout' is
a non-negative number, it blocks at most 'timeout' seconds and raises
the Empty exception if no item was available within that time.
Otherwise ('block' is false), return an item if one is immediately
available, else raise the Empty exception ('timeout' is ignored
in that case).
'''
with self.not_empty:
if not block:
if not self._qsize():
raise Empty
elif timeout is None:
while not self._qsize():
self.not_empty.wait()
elif timeout < 0:
raise ValueError("'timeout' must be a non-negative number")
else:
endtime = time() + timeout
while not self._qsize():
remaining = endtime - time()
if remaining <= 0.0:
raise Empty
self.not_empty.wait(remaining)
item = self._get()
self.not_full.notify()
return item
def put_nowait(self, item):
'''Put an item into the queue without blocking.
Only enqueue the item if a free slot is immediately available.
Otherwise raise the Full exception.
'''
return self.put(item, block=False)
def get_nowait(self):
'''Remove and return an item from the queue without blocking.
Only get an item if one is immediately available. Otherwise
raise the Empty exception.
'''
return self.get(block=False)
# Override these methods to implement other queue organizations
# (e.g. stack or priority queue).
# These will only be called with appropriate locks held
# Initialize the queue representation
def _init(self, maxsize):
self.queue = deque()
def _qsize(self):
return len(self.queue)
# Put a new item in the queue
def _put(self, item):
self.queue.append(item)
# Get an item from the queue
def _get(self):
return self.queue.popleft()
而multiprocessing里也有一个Queue,使用multiprocessing.Queue
调用。
源码如下:
class Queue(object):
def __init__(self, maxsize=0, *, ctx):
if maxsize <= 0:
# Can raise ImportError (see issues #3770 and #23400)
from .synchronize import SEM_VALUE_MAX as maxsize
self._maxsize = maxsize
self._reader, self._writer = connection.Pipe(duplex=False)
self._rlock = ctx.Lock()
self._opid = os.getpid()
if sys.platform == 'win32':
self._wlock = None
else:
self._wlock = ctx.Lock()
self._sem = ctx.BoundedSemaphore(maxsize)
# For use by concurrent.futures
self._ignore_epipe = False
self._after_fork()
if sys.platform != 'win32':
register_after_fork(self, Queue._after_fork)
def __getstate__(self):
context.assert_spawning(self)
return (self._ignore_epipe, self._maxsize, self._reader, self._writer,
self._rlock, self._wlock, self._sem, self._opid)
def __setstate__(self, state):
(self._ignore_epipe, self._maxsize, self._reader, self._writer,
self._rlock, self._wlock, self._sem, self._opid) = state
self._after_fork()
def _after_fork(self):
debug('Queue._after_fork()')
self._notempty = threading.Condition(threading.Lock())
self._buffer = collections.deque()
self._thread = None
self._jointhread = None
self._joincancelled = False
self._closed = False
self._close = None
self._send_bytes = self._writer.send_bytes
self._recv_bytes = self._reader.recv_bytes
self._poll = self._reader.poll
def put(self, obj, block=True, timeout=None):
assert not self._closed, "Queue {0!r} has been closed".format(self)
if not self._sem.acquire(block, timeout):
raise Full
with self._notempty:
if self._thread is None:
self._start_thread()
self._buffer.append(obj)
self._notempty.notify()
def get(self, block=True, timeout=None):
if block and timeout is None:
with self._rlock:
res = self._recv_bytes()
self._sem.release()
else:
if block:
deadline = time.monotonic() + timeout
if not self._rlock.acquire(block, timeout):
raise Empty
try:
if block:
timeout = deadline - time.monotonic()
if not self._poll(timeout):
raise Empty
elif not self._poll():
raise Empty
res = self._recv_bytes()
self._sem.release()
finally:
self._rlock.release()
# unserialize the data after having released the lock
return _ForkingPickler.loads(res)
def qsize(self):
# Raises NotImplementedError on Mac OSX because of broken sem_getvalue()
return self._maxsize - self._sem._semlock._get_value()
def empty(self):
return not self._poll()
def full(self):
return self._sem._semlock._is_zero()
def get_nowait(self):
return self.get(False)
def put_nowait(self, obj):
return self.put(obj, False)
def close(self):
self._closed = True
try:
self._reader.close()
finally:
close = self._close
if close:
self._close = None
close()
def join_thread(self):
debug('Queue.join_thread()')
assert self._closed, "Queue {0!r} not closed".format(self)
if self._jointhread:
self._jointhread()
def cancel_join_thread(self):
debug('Queue.cancel_join_thread()')
self._joincancelled = True
try:
self._jointhread.cancel()
except AttributeError:
pass
def _start_thread(self):
debug('Queue._start_thread()')
# Start thread which transfers data from buffer to pipe
self._buffer.clear()
self._thread = threading.Thread(
target=Queue._feed,
args=(self._buffer, self._notempty, self._send_bytes,
self._wlock, self._writer.close, self._ignore_epipe,
self._on_queue_feeder_error, self._sem),
name='QueueFeederThread'
)
self._thread.daemon = True
debug('doing self._thread.start()')
self._thread.start()
debug('... done self._thread.start()')
if not self._joincancelled:
self._jointhread = Finalize(
self._thread, Queue._finalize_join,
[weakref.ref(self._thread)],
exitpriority=-5
)
# Send sentinel to the thread queue object when garbage collected
self._close = Finalize(
self, Queue._finalize_close,
[self._buffer, self._notempty],
exitpriority=10
)
@staticmethod
def _finalize_join(twr):
debug('joining queue thread')
thread = twr()
if thread is not None:
thread.join()
debug('... queue thread joined')
else:
debug('... queue thread already dead')
@staticmethod
def _finalize_close(buffer, notempty):
debug('telling queue thread to quit')
with notempty:
buffer.append(_sentinel)
notempty.notify()
@staticmethod
def _feed(buffer, notempty, send_bytes, writelock, close, ignore_epipe,
onerror, queue_sem):
debug('starting thread to feed data to pipe')
nacquire = notempty.acquire
nrelease = notempty.release
nwait = notempty.wait
bpopleft = buffer.popleft
sentinel = _sentinel
if sys.platform != 'win32':
wacquire = writelock.acquire
wrelease = writelock.release
else:
wacquire = None
while 1:
try:
nacquire()
try:
if not buffer:
nwait()
finally:
nrelease()
try:
while 1:
obj = bpopleft()
if obj is sentinel:
debug('feeder thread got sentinel -- exiting')
close()
return
# serialize the data before acquiring the lock
obj = _ForkingPickler.dumps(obj)
if wacquire is None:
send_bytes(obj)
else:
wacquire()
try:
send_bytes(obj)
finally:
wrelease()
except IndexError:
pass
except Exception as e:
if ignore_epipe and getattr(e, 'errno', 0) == errno.EPIPE:
return
# Since this runs in a daemon thread the resources it uses
# may be become unusable while the process is cleaning up.
# We ignore errors which happen after the process has
# started to cleanup.
if is_exiting():
info('error in queue thread: %s', e)
return
else:
# Since the object has not been sent in the queue, we need
# to decrease the size of the queue. The error acts as
# if the object had been silently removed from the queue
# and this step is necessary to have a properly working
# queue.
queue_sem.release()
onerror(e, obj)
@staticmethod
def _on_queue_feeder_error(e, obj):
"""
Private API hook called when feeding data in the background thread
raises an exception. For overriding by concurrent.futures.
"""
import traceback
traceback.print_exc()
正确解法
# from queue import Queue
from multiprocessing import Process, Queue # 使用multiprocessing里的Queue
from random import randint
import argparse
def multi(test_data, q):
if test_data % 2 == 0:
q.put(test_data)
def write_result(q):
while True:
if q.qsize() > 0:
ret_data = q.get()
print(ret_data)
if ret_data == 'stop':
break
with open('text.txt', 'a') as f:
f.write(''.join([str(ret_data), '\n']))
def main(procossor):
q = Queue(100)
w_p = Process(target=write_result, args=(q, ))
w_p.start()
test_list = [randint(0, 100) for i in range(10000)]
while len(test_list) > 0:
for i in range(procossor):
if len(test_list) > 0:
test_data = test_list.pop()
p = Process(target=multi, args=(test_data, q, ))
p.start()
else:
continue
p.join()
q.put('stop')
w_p.join()
if __name__ == '__main__':
with open('text.txt', 'w') as f:
pass
parser = argparse.ArgumentParser()
parser.add_argument("-p", "--PROCESOR", default=100, type=int, help="use procesor")
args = parser.parse_args()
main(args.PROCESOR)