07、进程初步使用 -- 线程池的使用 concorrent模块【进程池使用预热】_runtimeerror: cannot schedule new futures after in-CSDN博客

本文链接：https://blog.csdn.net/weixin_41097516/article/details/109232568

01、concurrent -- python中使用进程池线程池多线程多进程进行编程的一个顶层设计的包

--concurrent：在python3.2中引入的一个包，主要是做进程池  线程池，能帮助理解协程。

--为什么需要线程池，进程池？
    --并发量的控制：使用semaphore不是非常好用，需要自己去维护和仔细审查逻辑并校验
    --concurrent不仅用于简单控制进程数量，还可以用于其他方面。例如：主线程可以获取某一个线程的状态和返回值，在一个子线程结束以后做些什么进行专门的处理。此时使用concurrent就非常合适
    --futures：多线程  多进程使用接口基本一致，切换非常平滑

--concurrent使用线程池：基本调用

from concurrent.futures import ThreadPoolExecutor
import time


def get_html(times):
    time.sleep(times)
    print("get pages {} success".format(times))
    return times

executor = ThreadPoolExecutor(max_workers=2)

# 线程池的初级调用
# 通过submit提交需要执行的函数到线程池中去
# 这里task1  task2都是future类对象，通过调用相应方法可以获取线程执行状态以及线程函数返回值
task1 = executor.submit(get_html, (3))
task2 = executor.submit(get_html, (2))

# 执行线程的属性及返回值调用
# done()用于判定进程是否执行成功，但是由于submit不是阻塞的，所以直接打印done()返回值为False
print(task1.done())
print(task2.cancel())  # 如果线程处于执行状态或者已经执行完毕，calcel()返回值为False
time.sleep(3)
print(task1.done())

# result()方法是阻塞式的方法，用于获取线程返回值
print(task1.result())

--concurrent使用线程池：使用列表推导式生成多个线程，并使用as_completed取出数据
from concurrent.futures import ThreadPoolExecutor, as_completed
import time


def get_html(times):
    time.sleep(0)
    print("get pages {} success".format(times))
    return times

executor = ThreadPoolExecutor(max_workers=2)

# 批量执行
# 尽管最大并发设置为2，但是这里任然可以一次建立很多线程，具体执行有python模块自己安排执行
urls = [3, 2, 4]
all_tasks = [executor.submit(get_html, (url)) for url in urls]

# as_complated 实际是一个生成器，打开源码找到yield即可
# as_completed 生成器返回的实际就是执行完毕的future对象
# 以下示例即可取出所有线程执行结果，但是有一个问题：这个线程池是异步的，如何实现在线程不执行结束之前一直循环，其实在源码有一个类似while True的机制在起作用
for future in as_completed(all_tasks):
    print(future.result())

--concurrent使用线程池：使用ThreadPoolExecutor的对象的属性方法map，来进行批量调用生成线程池

--代码示例：
from concurrent.futures import ThreadPoolExecutor, as_completed
import time


def get_html(times):
    time.sleep(0)
    print("get pages {} success".format(times))
    return times[0] + times[1]

executor = ThreadPoolExecutor(max_workers=2)

# 批量执行
# 尽管最大并发设置为2，但是这里任然可以一次建立很多线程，具体执行有python模块自己安排执行
urls = [[3, 6], [2, 8], [4, 9]]

# 使用executor.map()生成，这里使用多个参数传递
# map()生成器返回的就是executor.result()，即阻塞式
# map()执行结果和输入参数列表输入顺序一直
for future in executor.map(get_html, urls):
    print(future)

02、使用wait函数等待程序执行完毕，才结束主线程

from concurrent.futures import ThreadPoolExecutor, as_completed, wait, ALL_COMPLETED
import time


def get_html(times):
    time.sleep(0)
    print("get pages {} success".format(times))
    return times[0] + times[1]

executor = ThreadPoolExecutor(max_workers=2)

# 批量执行
# 尽管最大并发设置为2，但是这里任然可以一次建立很多线程，具体执行有python模块自己安排执行
urls = [[3, 6], [2, 8], [4, 9]]


all_tasks = [executor.submit(get_html, (url)) for url in urls]

"""
FIRST_COMPLETED = 'FIRST_COMPLETED'
FIRST_EXCEPTION = 'FIRST_EXCEPTION'
ALL_COMPLETED = 'ALL_COMPLETED'
"""
# 通过wait函数，等待所有线程执行完毕才结束主线程
# 可以设置等待时间 timeout，以及等待模式 return_when
wait(all_tasks, return_when=ALL_COMPLETED)  
print('main')

03、concurrent中 ThreadPoolExecutor源码学习

--分析from concurrent.futures import ThreadPoolExecutor源代码，主要是分析
  from concurrent.futures import Future中Future类的代码

--Future被称为未来对象，是task的返回容器，存放线程的执行状态和返回结果
    --需要关注其如何更新task状态，以及什么时候进行更新
    --对于线程 协程 进程池这些异步编程都或多或少的涉及了Future编程思想，因此这种设计的方式和模式值得关注

--submit()函数解析
    def submit(*args, **kwargs):
		# 到with之前都是对入参的异常处理
        if len(args) >= 2:
            self, fn, *args = args
        elif not args:
            raise TypeError("descriptor 'submit' of 'ThreadPoolExecutor' object "
                            "needs an argument")
        elif 'fn' in kwargs:
            fn = kwargs.pop('fn')
            self, *args = args
            import warnings
            warnings.warn("Passing 'fn' as keyword argument is deprecated",
                          DeprecationWarning, stacklevel=2)
        else:
            raise TypeError('submit expected at least 1 positional argument, '
                            'got %d' % (len(args)-1))

		# 
        with self._shutdown_lock:  # self._shutdown_lock = threading.Lock()
            if self._broken:  # self._broken = False
                raise BrokenThreadPool(self._broken)

            if self._shutdown:  # self._shutdown = False
                raise RuntimeError('cannot schedule new futures after shutdown')
            if _shutdown:  # _shutdown = False
                raise RuntimeError('cannot schedule new futures after '
                                   'interpreter shutdown')

            f = _base.Future()  # 引入Future未来对象
            w = _WorkItem(f, fn, args, kwargs)  # 这个才是真正执行对象，引入参数【未来对象，函数，参数】，将状态信息等进行更新和调整，引入了Future函数被调用

            self._work_queue.put(w)  # 将执行对象放入队列 self._work_queue = queue.SimpleQueue()
            self._adjust_thread_count()  # 调整线程数量等状态信息
            return f # 返回未来对象
	
    # 就是上面的 _adjust_thread_count() 	
    def _adjust_thread_count(self):
        # if idle threads are available, don't spin new threads
        if self._idle_semaphore.acquire(timeout=0):
            return

        # When the executor gets lost, the weakref callback will wake up
        # the worker threads.
        def weakref_cb(_, q=self._work_queue):
            q.put(None)

        num_threads = len(self._threads)
		
		# 判断如果启动的数量小于最大数则立即启动一个线程，设置为守护线程，且要加入到self._threads设个set集合中去
        if num_threads < self._max_workers:
            thread_name = '%s_%d' % (self._thread_name_prefix or self,
                                     num_threads)
            t = threading.Thread(name=thread_name, target=_worker,
                                 args=(weakref.ref(self, weakref_cb),
                                       self._work_queue,
                                       self._initializer,
                                       self._initargs))
            t.daemon = True
            t.start()
            self._threads.add(t)  # self._threads = set()
            _threads_queues[t] = self._work_queue

--为了体现出其重要性，这里展示Future类源码如下并简单注释：

class Future(object):
    """Represents the result of an asynchronous computation."""

    def __init__(self):
        """Initializes the future. Should not be called by clients."""
        self._condition = threading.Condition()
        self._state = PENDING
        self._result = None
        self._exception = None
        self._waiters = []
        self._done_callbacks = []

    def _invoke_callbacks(self):
        for callback in self._done_callbacks:
            try:
                callback(self)
            except Exception:
                LOGGER.exception('exception calling callback for %r', self)

    def __repr__(self):
        with self._condition:
            if self._state == FINISHED:
                if self._exception:
                    return '<%s at %#x state=%s raised %s>' % (
                        self.__class__.__name__,
                        id(self),
                        _STATE_TO_DESCRIPTION_MAP[self._state],
                        self._exception.__class__.__name__)
                else:
                    return '<%s at %#x state=%s returned %s>' % (
                        self.__class__.__name__,
                        id(self),
                        _STATE_TO_DESCRIPTION_MAP[self._state],
                        self._result.__class__.__name__)
            return '<%s at %#x state=%s>' % (
                    self.__class__.__name__,
                    id(self),
                   _STATE_TO_DESCRIPTION_MAP[self._state])
    
    # 取消
    def cancel(self):
        """Cancel the future if possible.

        Returns True if the future was cancelled, False otherwise. A future
        cannot be cancelled if it is running or has already completed.
        """
        with self._condition:  # 取出状态，如果是在运行中则会失败
            if self._state in [RUNNING, FINISHED]:
                return False

            if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
                return True

            self._state = CANCELLED
            self._condition.notify_all()

        self._invoke_callbacks()
        return True

    # 判断是否为取消的状态
    def cancelled(self):
        """Return True if the future was cancelled."""
        with self._condition:
            return self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]

    # 判断是否为执行状态
    def running(self):
        """Return True if the future is currently executing."""
        with self._condition:
            return self._state == RUNNING

    # 判断是否为结束状态
    def done(self):
        """Return True of the future was cancelled or finished executing."""
        with self._condition:
            return self._state in [CANCELLED, CANCELLED_AND_NOTIFIED, FINISHED]

    def __get_result(self):
        if self._exception:
            raise self._exception
        else:
            return self._result

    def add_done_callback(self, fn):
        """Attaches a callable that will be called when the future finishes.

        Args:
            fn: A callable that will be called with this future as its only
                argument when the future completes or is cancelled. The callable
                will always be called by a thread in the same process in which
                it was added. If the future has already completed or been
                cancelled then the callable will be called immediately. These
                callables are called in the order that they were added.
        """
        with self._condition:
            if self._state not in [CANCELLED, CANCELLED_AND_NOTIFIED, FINISHED]:
                self._done_callbacks.append(fn)
                return
        try:
            fn(self)
        except Exception:
            LOGGER.exception('exception calling callback for %r', self)

    # 阻塞的方式：因为其中调用了self._condition，一般阻塞方法都有timeout
    def result(self, timeout=None):
        """Return the result of the call that the future represents.

        Args:
            timeout: The number of seconds to wait for the result if the future
                isn't done. If None, then there is no limit on the wait time.

        Returns:
            The result of the call that the future represents.

        Raises:
            CancelledError: If the future was cancelled.
            TimeoutError: If the future didn't finish executing before the given
                timeout.
            Exception: If the call raised then that exception will be raised.
        """
        with self._condition:
            if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
                raise CancelledError()
            elif self._state == FINISHED:
                return self.__get_result()

            self._condition.wait(timeout)

            if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
                raise CancelledError()
            elif self._state == FINISHED:
                return self.__get_result()
            else:
                raise TimeoutError()

    def exception(self, timeout=None):
        """Return the exception raised by the call that the future represents.

        Args:
            timeout: The number of seconds to wait for the exception if the
                future isn't done. If None, then there is no limit on the wait
                time.

        Returns:
            The exception raised by the call that the future represents or None
            if the call completed without raising.

        Raises:
            CancelledError: If the future was cancelled.
            TimeoutError: If the future didn't finish executing before the given
                timeout.
        """

        with self._condition:
            if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
                raise CancelledError()
            elif self._state == FINISHED:
                return self._exception

            self._condition.wait(timeout)

            if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
                raise CancelledError()
            elif self._state == FINISHED:
                return self._exception
            else:
                raise TimeoutError()

    # The following methods should only be used by Executors and in tests.
    def set_running_or_notify_cancel(self):
        """Mark the future as running or process any cancel notifications.

        Should only be used by Executor implementations and unit tests.

        If the future has been cancelled (cancel() was called and returned
        True) then any threads waiting on the future completing (though calls
        to as_completed() or wait()) are notified and False is returned.

        If the future was not cancelled then it is put in the running state
        (future calls to running() will return True) and True is returned.

        This method should be called by Executor implementations before
        executing the work associated with this future. If this method returns
        False then the work should not be executed.

        Returns:
            False if the Future was cancelled, True otherwise.

        Raises:
            RuntimeError: if this method was already called or if set_result()
                or set_exception() was called.
        """
        with self._condition:
            if self._state == CANCELLED:
                self._state = CANCELLED_AND_NOTIFIED
                for waiter in self._waiters:
                    waiter.add_cancelled(self)
                # self._condition.notify_all() is not necessary because
                # self.cancel() triggers a notification.
                return False
            elif self._state == PENDING:
                self._state = RUNNING
                return True
            else:
                LOGGER.critical('Future %s in unexpected state: %s',
                                id(self),
                                self._state)
                raise RuntimeError('Future in unexpected state')

    # 这个函数中调用了notify_all会将其他阻塞的全部给一个信息让其执行起来
    def set_result(self, result):
        """Sets the return value of work associated with the future.

        Should only be used by Executor implementations and unit tests.
        """
        with self._condition:
            if self._state in {CANCELLED, CANCELLED_AND_NOTIFIED, FINISHED}:
                raise InvalidStateError('{}: {!r}'.format(self._state, self))
            self._result = result
            self._state = FINISHED
            for waiter in self._waiters:
                waiter.add_result(self)
            self._condition.notify_all()
        self._invoke_callbacks()

    def set_exception(self, exception):
        """Sets the result of the future as being the given exception.

        Should only be used by Executor implementations and unit tests.
        """
        with self._condition:
            if self._state in {CANCELLED, CANCELLED_AND_NOTIFIED, FINISHED}:
                raise InvalidStateError('{}: {!r}'.format(self._state, self))
            self._exception = exception
            self._state = FINISHED
            for waiter in self._waiters:
                waiter.add_exception(self)
            self._condition.notify_all()
        self._invoke_callbacks()