python实现进程池执行任务超时自动重试

#!coding=utf-8

import os

from multiprocessing.pool import Pool

import Queue

import threading

import time

import signal

import itertools

from multiprocessing import Process, cpu_count, TimeoutError

from multiprocessing.util import Finalize, debug

job_counter = itertools.count()

RUN = 0

CLOSE = 1

TERMINATE = 2

#

# Code run by worker processes

#

#线程锁

lock = threading.RLock()

class MaybeEncodingError(Exception):

"""Wraps possible unpickleable errors, so they can be

safely sent through the socket."""

def __init__(self, exc, value):

self.exc = repr(exc)

self.value = repr(value)

super(MaybeEncodingError, self).__init__(self.exc, self.value)

def __str__(self):

return "Error sending result: '%s'. Reason: '%s'" % (self.value,

self.exc)

def __repr__(self):

return "<MaybeEncodingError: %s>" % str(self)

def worker(inqueue, outqueue, childinfiqueue, initializer=None, initargs=(), maxtasks=None):

assert maxtasks is None or (type(maxtasks) in (int, long) and maxtasks > 0)

put = outqueue.put

get = inqueue.get

# 向主进程返回任务信息

childinfiqueueput = childinfiqueue.put

if hasattr(inqueue, '_writer'):

inqueue._writer.close()

outqueue._reader.close()

# childinfiqueueput._reader.close()

if initializer is not None:

initializer(*initargs)

completed = 0

while maxtasks is None or (maxtasks and completed < maxtasks):

try:

task = get()

except (EOFError, IOError):

debug('worker got EOFError or IOError -- exiting')

break

if task is None:

debug('worker got sentinel -- exiting')

break

job, i, func, args, kwds = task

# put任务信息给主进程

pid = os.getpid()

print(pid)

childinfiqueueput((job, i, func, args, kwds, pid))

try:

result = (True, func(*args, **kwds))

except Exception as e:

result = (False, e)

try:

put((job, i, result))

except Exception as e:

wrapped = MaybeEncodingError(e, result[1])

debug("Possible encoding error while sending result: %s" % (

wrapped))

put((job, i, (False, wrapped)))

task = job = result = func = args = kwds = None

completed += 1

debug('worker exiting after %d tasks' % completed)

#

# Class representing a process pool

#

class MyPool(Pool):

def __init__(self, processes=None, timeout=None, retrycount=None, initializer=None, initargs=(),

maxtasksperchild=None):

self._setup_queues()

self._taskqueue = Queue.Queue()

self._cache = {}

self._state = RUN

self._maxtasksperchild = maxtasksperchild

self._initializer = initializer

self._initargs = initargs

self._child_jobs = {}

self.timeout = timeout

self.retrycount = retrycount

self.job_retry_cache = {}

if processes is None:

try:

processes = cpu_count()

except NotImplementedError:

processes = 1

if processes < 1:

raise ValueError("Number of processes must be at least 1")

if initializer is not None and not hasattr(initializer, '__call__'):

raise TypeError('initializer must be a callable')

self._processes = processes

self._pool = []

self._repopulate_pool()

self._worker_handler = threading.Thread(

target= MyPool._handle_workers,

args=(self,)

)

self._worker_handler.daemon = True

self._worker_handler._state = RUN

self._worker_handler.start()

self._task_handler = threading.Thread(

target=Pool._handle_tasks,

args=(self._taskqueue, self._quick_put, self._outqueue,

self._pool, self._cache)

)

self._task_handler.daemon = True

self._task_handler._state = RUN

self._task_handler.start()

self._result_handler = threading.Thread(

target=Pool._handle_results,

args=(self._outqueue, self._quick_get, self._cache)

)

self._result_handler.daemon = True

self._result_handler._state = RUN

self._result_handler.start()

#处理子进程发送过来任务信息

self._childinfo_handler = threading.Thread(

target=MyPool._handle_childinfo,

args=(self._childinfoqueue,self._child_jobs)

)

self._childinfo_handler.daemon = True

self._childinfo_handler._state = RUN

self._childinfo_handler.start()

#重试超时任务

self._timeoutjob_handler = threading.Thread(

target=MyPool._handle_timeoutjob,

args=(self._child_jobs,self._taskqueue,self._cache,

self.timeout,self.retrycount)

)

self._timeoutjob_handler.daemon = True

self._timeoutjob_handler._state = RUN

self._timeoutjob_handler.start()

self._terminate = Finalize(

self, self._terminate_pool,

args=(self._taskqueue, self._inqueue, self._outqueue, self._pool,

self._worker_handler, self._task_handler,

self._result_handler, self._cache),

exitpriority=15

)

@staticmethod

def _handle_workers(pool):

thread = threading.current_thread()

# Keep maintaining workers until the cache gets drained, unless the pool

# is terminated.

while thread._state == RUN or (pool._cache and thread._state != TERMINATE):

pool._maintain_pool()

time.sleep(0.1)

# send sentinel to stop workers

pool._taskqueue.put(None)

pool._childinfoqueue.put(None)

pool._timeoutjob_handler._state = TERMINATE

debug('worker handler exiting')

@staticmethod

def _handle_childinfo(childinfoqueue,child_jobs):

get = childinfoqueue.get

while 1:

try:

task = get()

except (IOError, EOFError):

debug('childinfo handler got EOFError/IOError -- exiting')

return

if task is None:

debug('childinfo handler got sentinel')

break

job, i, func, args, kwds, pid = task

ctime = time.time()

try:

lock.acquire()

if job not in child_jobs:

continue

print("task-{} put childinfoqueue".format(job))

child_jobs[job] = [(i, func, args, kwds, pid, ctime), child_jobs[job][-1]]

finally:

lock.release()

@staticmethod

def _handle_timeoutjob(child_jobs, task_queue, cache, timeout, retrycount):

thread = threading.current_thread()

while thread._state == RUN:

try:

lock.acquire()

for job,task in child_jobs.items():

if child_jobs[job][0] == None:

continue

i, func, args, kwds, pid, ctime = child_jobs[job][0]

cretrycount = child_jobs[job][1]

# 如果子进程任务超时,则杀掉该进程,重新将任务put到task_queue中等待子进程获取

# 杀掉子进程的同时需要以该任务之前的jobid为首将任务put到task_queue中

# 当子进程某个任务结束时,同时也要释放在child_jobs中的job记录

if time.time() - ctime > timeout:

print("子进程:{}中任务超时,Kill掉该进程重试任务{},目前已重试次数:{}".format(pid,job,cretrycount))

child_job_pid = pid

os.kill(child_job_pid, signal.SIGKILL)

if cretrycount == retrycount:

print("该任务重试次数已用完,将取消该任务")

cache[job]._set(i,(False,Exception("执行失败")))

print(cache)

continue

task_queue.put(([(job, i, func, args, kwds)], None))

child_jobs[job][1] = cretrycount + 1

finally:

lock.release()

time.sleep(0.5)

def _repopulate_pool(self):

"""Bring the number of pool processes up to the specified number,

for use after reaping workers which have exited.

"""

for i in range(self._processes - len(self._pool)):

w = self.Process(target=worker,

args=(self._inqueue, self._outqueue, self._childinfoqueue,

self._initializer,

self._initargs, self._maxtasksperchild)

)

self._pool.append(w)

w.name = w.name.replace('Process', 'PoolWorker')

w.daemon = True

w.start()

debug('added worker')

print('added worker')

def _setup_queues(self):

from multiprocessing.queues import SimpleQueue

self._inqueue = SimpleQueue()

self._outqueue = SimpleQueue()

self._quick_put = self._inqueue._writer.send

self._quick_get = self._outqueue._reader.recv

self._childinfoqueue = SimpleQueue()

def apply_async(self, func, args=(), kwds={}, callback=None):

'''

Asynchronous equivalent of `apply()` builtin

'''

assert self._state == RUN

result = MyApplyResult(self._cache, self._child_jobs, callback)

self._taskqueue.put(([(result._job, None, func, args, kwds)], None))

return result

def join(self):

debug('joining pool')

assert self._state in (CLOSE, TERMINATE)

self._worker_handler.join()

self._task_handler.join()

self._result_handler.join()

self._childinfo_handler.join()

self._timeoutjob_handler.join()

for p in self._pool:

p.join()

from multiprocessing.pool import ApplyResult

class MyApplyResult(ApplyResult):

def __init__(self, cache, child_jobs, callback):

self._cond = threading.Condition(threading.Lock())

self._job = job_counter.next()

self._cache = cache

self._ready = False

self._callback = callback

self._child_jobs = child_jobs

cache[self._job] = self

# 存储任务信息及重试次数

try:

lock.acquire()

child_jobs[self._job] = [None,0]

finally:

lock.release()

def _set(self, i, obj):

self._success, self._value = obj

if self._callback and self._success:

self._callback(self._value)

self._cond.acquire()

try:

self._ready = True

self._cond.notify()

finally:

self._cond.release()

del self._cache[self._job]

lock.acquire()

print("任务-{}结束".format(self._job))

del self._child_jobs[self._job]

lock.release()

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值