stackstorm 29. 源码分析之----stackstorm的actionrunner服务并发能力分析

目标:
弄清楚st2actionrunner的并发处理能力

1 分析actionrunner逻辑流程
在st2actionrunner服务中两个消费者服务ActionExecutionScheduler和ActionExecutionDispatcher,
其中ActionExecutionScheduler服务既是消费者,也作为生产者发送消息给ActionExecutionDispatcher处理,
ActionExecutionDispatcher接收到消息调用runner去执行action,生成execution,最终完成整个处理过程。
如果采用mistral runner则会建立workflow和execution, st2会关联mistral中的execution到st2本身的execution,
用于追溯execution的执行情况。

2 ActionExecutionScheduler也是发送消息的,所以真正的执行runner是
ActionExecutionDispatcher,其调用RunnerContainer。
其中python runner在:
st2/contrib/runners/python_runner/python_runner/python_runner.py
PythonRunner类的run方法调用run_command方法
对应于
from st2common.util.green.shell import run_command
进入:
st2/st2common/st2common/util/green/shell.py的
def run_command(cmd, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=False,
                cwd=None, env=None, timeout=60, preexec_func=None, kill_func=None,
                read_stdout_func=None, read_stderr_func=None,
                read_stdout_buffer=None, read_stderr_buffer=None):
    """
    Run the provided command in a subprocess and wait until it completes.

    :param cmd: Command to run.
    :type cmd: ``str`` or ``list``

    :param stdin: Process stdin.
    :type stdin: ``object``

    :param stdout: Process stdout.
    :type stdout: ``object``

    :param stderr: Process stderr.
    :type stderr: ``object``

    :param shell: True to use a shell.
    :type shell ``boolean``

    :param cwd: Optional working directory.
    :type cwd: ``str``

    :param env: Optional environment to use with the command. If not provided,
                environment from the current process is inherited.
    :type env: ``dict``

    :param timeout: How long to wait before timing out.
    :type timeout: ``float``

    :param preexec_func: Optional pre-exec function.
    :type preexec_func: ``callable``

    :param kill_func: Optional function which will be called on timeout to kill the process.
                      If not provided, it defaults to `process.kill`
    :type kill_func: ``callable``

    :param read_stdout_func: Function which is responsible for reading process stdout when
                                 using live read mode.
    :type read_stdout_func: ``func``

    :param read_stdout_func: Function which is responsible for reading process stderr when
                                 using live read mode.
    :type read_stdout_func: ``func``


    :rtype: ``tuple`` (exit_code, stdout, stderr, timed_out)
    """
    LOG.debug('Entering st2common.util.green.run_command.')

    assert isinstance(cmd, (list, tuple) + six.string_types)

    if (read_stdout_func and not read_stderr_func) or (read_stderr_func and not read_stdout_func):
        raise ValueError('Both read_stdout_func and read_stderr_func arguments need '
                         'to be provided.')

    if read_stdout_func and not (read_stdout_buffer or read_stderr_buffer):
        raise ValueError('read_stdout_buffer and read_stderr_buffer arguments need to be provided '
                         'when read_stdout_func is provided')

    if not env:
        LOG.debug('env argument not provided. using process env (os.environ).')
        env = os.environ.copy()

    # Note: We are using eventlet friendly implementation of subprocess
    # which uses GreenPipe so it doesn't block
    LOG.debug('Creating subprocess.')
    process = subprocess.Popen(args=cmd, stdin=stdin, stdout=stdout, stderr=stderr,
                               env=env, cwd=cwd, shell=shell, preexec_fn=preexec_func)

    if read_stdout_func:
        LOG.debug('Spawning read_stdout_func function')
        read_stdout_thread = eventlet.spawn(read_stdout_func, process.stdout, read_stdout_buffer)

    if read_stderr_func:
        LOG.debug('Spawning read_stderr_func function')
        read_stderr_thread = eventlet.spawn(read_stderr_func, process.stderr, read_stderr_buffer)

    def on_timeout_expired(timeout):
        global timed_out

        try:
            LOG.debug('Starting process wait inside timeout handler.')
            process.wait(timeout=timeout)
        except subprocess.TimeoutExpired:
            # Command has timed out, kill the process and propagate the error.
            # Note: We explicitly set the returncode to indicate the timeout.
            LOG.debug('Command execution timeout reached.')
            process.returncode = TIMEOUT_EXIT_CODE

            if kill_func:
                LOG.debug('Calling kill_func.')
                kill_func(process=process)
            else:
                LOG.debug('Killing process.')
                process.kill()

            if read_stdout_func and read_stderr_func:
                LOG.debug('Killing read_stdout_thread and read_stderr_thread')
                read_stdout_thread.kill()
                read_stderr_thread.kill()

    LOG.debug('Spawning timeout handler thread.')
    timeout_thread = eventlet.spawn(on_timeout_expired, timeout)
    LOG.debug('Attaching to process.')

    if read_stdout_func and read_stderr_func:
        LOG.debug('Using real-time stdout and stderr read mode, calling process.wait()')
        process.wait()
    else:
        LOG.debug('Using delayed stdout and stderr read mode, calling process.communicate()')
        stdout, stderr = process.communicate()

    timeout_thread.cancel()
    exit_code = process.returncode

    if read_stdout_func and read_stderr_func:
        # Wait on those green threads to finish reading from stdout and stderr before continuing
        read_stdout_thread.wait()
        read_stderr_thread.wait()

        stdout = read_stdout_buffer.getvalue()
        stderr = read_stderr_buffer.getvalue()

    if exit_code == TIMEOUT_EXIT_CODE:
        LOG.debug('Timeout.')
        timed_out = True
    else:
        LOG.debug('No timeout.')
        timed_out = False

    LOG.debug('Returning.')
    return (exit_code, stdout, stderr, timed_out)

分析:
最关键的部分
    process = subprocess.Popen(args=cmd, stdin=stdin, stdout=stdout, stderr=stderr,
                               env=env, cwd=cwd, shell=shell, preexec_fn=preexec_func)

    if read_stdout_func:
        LOG.debug('Spawning read_stdout_func function')
        read_stdout_thread = eventlet.spawn(read_stdout_func, process.stdout, read_stdout_buffer)

    if read_stderr_func:
        LOG.debug('Spawning read_stderr_func function')
        read_stderr_thread = eventlet.spawn(read_stderr_func, process.stderr, read_stderr_buffer)

这里用了eventlet.green的subprocess,具体如下:
from eventlet.green import subprocess
用eventlet.spawn来开启协程一旦执行完成,就将结果写入到actionexecution结果中。
用协程判断是否超时
最终返回结果:
return (exit_code, stdout, stderr, timed_out)

因为没有特殊设置eventlet的相关参数
eventlet.green.subprocess
参考文档:
https://kite.com/python/docs/eventlet.green.subprocess

中途还调用了:
st2/st2common/st2common/services/action.py

def store_execution_output_data(execution_db, action_db, data, output_type='output',
                                timestamp=None):
    """
    Store output from an execution as a new document in the collection.
    """
    execution_id = str(execution_db.id)
    action_ref = action_db.ref
    runner_ref = getattr(action_db, 'runner_type', {}).get('name', 'unknown')
    timestamp = timestamp or date_utils.get_datetime_utc_now()

    output_db = ActionExecutionOutputDB(execution_id=execution_id,
                                        action_ref=action_ref,
                                        runner_ref=runner_ref,
                                        timestamp=timestamp,
                                        output_type=output_type,
                                        data=data)
    output_db = ActionExecutionOutput.add_or_update(output_db, publish=True,
                                                    dispatch_trigger=False)

    return output_db

参考:
https://blog.csdn.net/qingyuanluofeng/java/article/details/105398730

总结:
1)调度action执行的服务是消息消费者,对应3副本,在该消费者中针对定时任务调用
python runner去执行定时任务的python脚本,执行定时任务的python脚本采用了
协程去操作,具体就是:
from eventlet.green import subprocess
process = subprocess.Popen(args=cmd, stdin=stdin, stdout=stdout, stderr=stderr,
                               env=env, cwd=cwd, shell=shell, preexec_fn=preexec_func)
read_stdout_thread = eventlet.spawn(read_stdout_func, process.stdout, read_stdout_buffer)
read_stderr_thread = eventlet.spawn(read_stderr_func, process.stderr, read_stderr_buffer)

3 看消费者本身
3.1 分析ActionExecutionDispatcher
对应代码st2/st2actions/st2actions/worker.py
class ActionExecutionDispatcher(MessageHandler):

    message_type = LiveActionDB

    def __init__(self, connection, queues):
        super(ActionExecutionDispatcher, self).__init__(connection, queues)
        self.container = RunnerContainer()
        self._running_liveactions = set()

    def get_queue_consumer(self, connection, queues):
        # We want to use a special ActionsQueueConsumer which uses 2 dispatcher pools
        return ActionsQueueConsumer(connection=connection, queues=queues, handler=self)

    def process(self, liveaction):
      ......

分析:
关键就是消费者ActionsQueueConsumer(connection=connection, queues=queues, handler=self)
看其是否支持并发处理

3.2 分析ActionsQueueConsumer
st2/st2common/st2common/transport/consumers.py
代码如下:
class ActionsQueueConsumer(QueueConsumer):
    """
    Special Queue Consumer for action runner which uses multiple BufferedDispatcher pools:

    1. For regular (non-workflow) actions
    2. One for workflow actions

    This way we can ensure workflow actions never block non-workflow actions.
    """

    def __init__(self, connection, queues, handler):
        self.connection = connection

        self._queues = queues
        self._handler = handler

        workflows_pool_size = cfg.CONF.actionrunner.workflows_pool_size
        actions_pool_size = cfg.CONF.actionrunner.actions_pool_size
        self._workflows_dispatcher = BufferedDispatcher(dispatch_pool_size=workflows_pool_size,
                                                        name='workflows-dispatcher')
        self._actions_dispatcher = BufferedDispatcher(dispatch_pool_size=actions_pool_size,
                                                      name='actions-dispatcher')

    def process(self, body, message):
        try:
            if not isinstance(body, self._handler.message_type):
                raise TypeError('Received an unexpected type "%s" for payload.' % type(body))

            action_is_workflow = getattr(body, 'action_is_workflow', False)
            if action_is_workflow:
                # Use workflow dispatcher queue
                dispatcher = self._workflows_dispatcher
            else:
                # Use queue for regular or workflow actions
                dispatcher = self._actions_dispatcher

            LOG.debug('Using BufferedDispatcher pool: "%s"', str(dispatcher))
            dispatcher.dispatch(self._process_message, body)
        except:
            LOG.exception('%s failed to process message: %s', self.__class__.__name__, body)
        finally:
            # At this point we will always ack a message.
            message.ack()

    def shutdown(self):
        self._workflows_dispatcher.shutdown()
        self._actions_dispatcher.shutdown()

分析:
self._actions_dispatcher = BufferedDispatcher(dispatch_pool_size=actions_pool_size, name='actions-dispatcher')
协程池的大小为:
workflows_pool_size = cfg.CONF.actionrunner.workflows_pool_size
actions_pool_size = cfg.CONF.actionrunner.actions_pool_size

找到:
st2/st2common/st2common/config.py
    dispatcher_pool_opts = [
        cfg.IntOpt('workflows_pool_size', default=40,
                   help='Internal pool size for dispatcher used by workflow actions.'),
        cfg.IntOpt('actions_pool_size', default=60,
                   help='Internal pool size for dispatcher used by regular actions.')
    ]
    do_register_opts(dispatcher_pool_opts, group='actionrunner')

所以workflow的协程池大小默认为40,action的协程池大小默认为60
查看/etc/st2/st2.conf文件内容:
[actionrunner]
logging = /etc/st2/logging.actionrunner.conf
[api]
# allow_origin is required for handling CORS in st2 web UI.
allow_origin = *
# Host and port to bind the API server.
host = 127.0.0.1
logging = /etc/st2/logging.api.conf
mask_secrets = true
port = 9101
[auth]
# Base URL to the API endpoint excluding the version (e.g. http://myhost.net:9101/)
api_url = http://st2api:9101
mode = standalone
# Note: Settings below are only used in "standalone" mode
# backend: flat_file
# backend_kwargs: '{"file_path": "/etc/st2/htpasswd"}'
backend = keystone
backend_kwargs = {"keystone_url": "http://keystone-api.openstack.svc.cluster.local:80", "keystone_version": 3, "keystone_mode": "email"}
debug = false
enable = true
host = 127.0.0.1
logging = /etc/st2/logging.auth.conf
port = 9100
use_ssl = false
[content]
packs_base_paths = /opt/stackstorm/packs.dev
[coordination]
url = redis://password@redis.openstack.svc.cluster.local:6379
[database]
host = mongodb.openstack.svc.cluster.local
password = dozer
port = 27017
username = dozer
[exporter]
logging = /etc/st2/logging.exporter.conf
[garbagecollector]
action_executions_output_ttl = 14
action_executions_ttl = 14
logging = /etc/st2/logging.garbagecollector.conf
purge_inquiries = true
trigger_instances_ttl = 14
[keyvalue]
encryption_key_path = /etc/st2/keys/datastore_key.json
[log]
excludes = requests,paramiko
mask_secrets = true
redirect_stderr = false
[messaging]
url = amqp://rabbitmq:JM0W8hWm@rabbitmq.openstack.svc.cluster.local:5672
[mistral]
api_url = http://st2api:9101
v2_base_url = http://mistral-api:8989/v2
[notifier]
logging = /etc/st2/logging.notifier.conf
[rbac]
enable = false
permission_isolation = true
sync_remote_groups = true
[resultstracker]
logging = /etc/st2/logging.resultstracker.conf
[rulesengine]
logging = /etc/st2/logging.rulesengine.conf
[sensorcontainer]
logging = /etc/st2/logging.sensorcontainer.conf
[ssh_runner]
remote_dir = /tmp
[stream]
logging = /etc/st2/logging.stream.conf
[syslog]
facility = local7
host = 127.0.0.1
port = 514
protocol = udp
[system]
base_path = /opt/stackstorm
[system_user]
ssh_key_file = /home/adminATexample.org/.ssh/admin_rsa
user = admin@example.org

关键就是:
self._actions_dispatcher = BufferedDispatcher(dispatch_pool_size=actions_pool_size, name='actions-dispatcher')


3.3 分析BufferedDispatcher
调用:
st2/st2common/st2common/util/greenpooldispatch.py的

class BufferedDispatcher(object):

    def __init__(self, dispatch_pool_size=50, monitor_thread_empty_q_sleep_time=5,
                 monitor_thread_no_workers_sleep_time=1, name=None):
        self._pool_limit = dispatch_pool_size
        self._dispatcher_pool = eventlet.GreenPool(dispatch_pool_size)
        self._dispatch_monitor_thread = eventlet.greenthread.spawn(self._flush)
        self._monitor_thread_empty_q_sleep_time = monitor_thread_empty_q_sleep_time
        self._monitor_thread_no_workers_sleep_time = monitor_thread_no_workers_sleep_time
        self._name = name

        self._work_buffer = Queue.Queue()

        # Internal attributes we use to track how long the pool is busy without any free workers
        self._pool_last_free_ts = time.time()

    @property
    def name(self):
        return self._name or id(self)

    def dispatch(self, handler, *args):
        self._work_buffer.put((handler, args), block=True, timeout=1)
        self._flush_now()

    def shutdown(self):
        self._dispatch_monitor_thread.kill()

    def _flush(self):
        while True:
            while self._work_buffer.empty():
                eventlet.greenthread.sleep(self._monitor_thread_empty_q_sleep_time)
            while self._dispatcher_pool.free() <= 0:
                eventlet.greenthread.sleep(self._monitor_thread_no_workers_sleep_time)
            self._flush_now()

    def _flush_now(self):
        if self._dispatcher_pool.free() <= 0:
            now = time.time()

            if (now - self._pool_last_free_ts) >= POOL_BUSY_THRESHOLD_SECONDS:
                LOG.info(POOL_BUSY_LOG_MESSAGE % (self.name, POOL_BUSY_THRESHOLD_SECONDS))

            return

        # Update the time of when there were free threads available
        self._pool_last_free_ts = time.time()

        while not self._work_buffer.empty() and self._dispatcher_pool.free() > 0:
            (handler, args) = self._work_buffer.get_nowait()
            self._dispatcher_pool.spawn(handler, *args)

分析:
1)self._dispatcher_pool = eventlet.GreenPool(dispatch_pool_size)

2)def dispatch(self, handler, *args):
向任务队列中压入待执行的方法和该方法所需的参数,
只要任务队列和协程池不空,就从任务队列中取出当前待处理任务及其参数,放入协程池中处理。

3)_flush_now(self):
只要任务队列和协程池不空,就从任务队列中取出当前待处理任务及其参数,放入协程池中处理。

4)最关键的部分就是在协程池中处理消息:
self._dispatcher_pool.spawn(handler, *args)


4 总结
1) st2actionrunner默认支持workflow的协程池大小默认为40,支持action的协程池大小默认为60
如果是3个actionrunner服务,则支持120个workflow并发处理,180个action并发处理。
2) st2actionrunner中主入口类ActionExecutionDispatcher在其消费者
ActionsQueueConsumer中预先实例化了两个BufferedDispatcher,一个用于处理workflow作为runner的BufferedDispatcher,
一个用于处理python作为runner的BufferedDispatcher。根据待处理消息判断其是哪一种放入对应BufferedDispatcher。
BufferedDispatcher其内部是一个队列+协程池。
将待处理消息和处理方法放入BufferedDispatcher的队列中,然后从队列中获取待处理消息和处理方法,然后使用协程池去处理消息。
所以st2actionrunner本质上是并发的。


参考:
stackstorm 2.6代码

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值