airflow源码精读 六

cli解析参数

airflow.py:
  
if __name__ == '__main__':

    if configuration.get("core", "security") == 'kerberos':
        os.environ['KRB5CCNAME'] = configuration.get('kerberos', 'ccache')
        os.environ['KRB5_KTNAME'] = configuration.get('kerberos', 'keytab')
    # 命令解析工厂初始化
    parser = CLIFactory.get_parser()
    # 解析参数
    args = parser.parse_args()
    # 执行参数指定的func
    args.func(args)

接受的cli命令通过在args根据给定命令匹配到指定的Arg(tuple数组),在以Arg名称去subparsers匹配到相应的执行函数func。

scheduler()

#cli.py 调度器的执行入口


def scheduler(args):
    print(settings.HEADER)
    # 生成SchedulerJob类
    job = jobs.SchedulerJob(
        dag_id=args.dag_id,
        subdir=process_subdir(args.subdir),
        run_duration=args.run_duration,
        num_runs=args.num_runs,
        do_pickle=args.do_pickle)
    # 守护进程
    if args.daemon:
        # 设置进程的pid, stdout, stderr, log等信息
        pid, stdout, stderr, log_file = setup_locations("scheduler", args.pid, args.stdout, args.stderr, args.log_file)

        handle = setup_logging(log_file)
        stdout = open(stdout, 'w+')
        stderr = open(stderr, 'w+')
        # DaemonContext上下文应用于创建守护进程
        ctx = daemon.DaemonContext(
            pidfile=TimeoutPIDLockFile(pid, -1),# 记录守护进程id
            files_preserve=[handle],# 指定哪些文件需要在守护进程模式下保持打开状态
            stdout=stdout,
            stderr=stderr,
        )
        with ctx:
            # 运行在守护进程模式下
            job.run()

        stdout.close()
        stderr.close()
    else:
        # 设置信号捕捉处理器
        signal.signal(signal.SIGINT, sigint_handler)
        signal.signal(signal.SIGTERM, sigint_handler)
        signal.signal(signal.SIGQUIT, sigquit_handler)
        job.run()

Dag

BaseDag和BaseDagBag

# 有向无环图
class BaseDag(object):
    """
    虚类
    Base DAG object that both the SimpleDag and DAG inherit.
    """
    __metaclass__ = ABCMeta

    @abstractproperty
    def dag_id(self):
        raise NotImplementedError()

    @abstractproperty
    def task_ids(self):
        """
        含有的任务ID
        """
        raise NotImplementedError()

    @abstractproperty
    def full_filepath(self):
        """
        文件路径
        """
        raise NotImplementedError()

    @abstractmethod
    def concurrency(self):
        """
        并行度
        :return: maximum number of tasks that can run simultaneously from this DAG
        :rtype: int
        """
        raise NotImplementedError()

    @abstractmethod
    def is_paused(self):
        """
        是否暂停
        """
        raise NotImplementedError()

    @abstractmethod
    def pickle_id(self):
        """
        序列化ID
        """
        raise NotImplementedError

# BaseDagBag表示有向无环图的组合
class BaseDagBag(object):
    """
    dag集合
    Base object that both the SimpleDagBag and DagBag inherit.
    """
    @abstractproperty
    def dag_ids(self):
        """
        包含的dag_id
        """
        raise NotImplementedError()

    @abstractmethod
    def get_dag(self, dag_id):
        """
        获取dag
        """
        raise NotImplementedError()

Dag 见代码注释

Operator算子

BaseOperator

算子本身可以是一个dag图,运行时会占用一定资源,以虚拟的pool_slot进行和物理resources表示

  • pool_slots是含有若干个slot,当并行计算的任务占用全部资源时,则报错

  • resources表示对于实际资源的需求,比如cpu和内存

  • sla后算子仍然没有完成,则报警

  • 触发规则

  • trigger_rule【all_success | all_failed | all_done | one_success】表示对于上游算子 的依赖;

  • depends_on_past 表示是否依赖于上次算子运行是否成功

  • wait_for_downstream表示当且上次的dagrun的下游算子完成之后,才能触发该次算子运行

  • 算子与算子之间,不能直接通信

  • 通过 xom,第三方。

  • 算子的具体逻辑实现在execute()中

BashOperator

class BashOperator(BaseOperator):
    template_fields = ('bash_command', 'env')
    template_ext = ('.sh', '.bash',)
    ui_color = '#f0ede4'

    @apply_defaults
    def __init__(
            self,
            bash_command,
            xcom_push=False,
            env=None,
            output_encoding='utf-8',
            *args, **kwargs):

        super(BashOperator, self).__init__(*args, **kwargs)
        self.bash_command = bash_command
        self.env = env
        self.xcom_push_flag = xcom_push
        self.output_encoding = output_encoding

    def execute(self, context):
        """
        Execute the bash command in a temporary directory
        which will be cleaned afterwards
        """
        bash_command = self.bash_command
        self.log.info("Tmp dir root location: \n %s", gettempdir())
        with TemporaryDirectory(prefix='airflowtmp') as tmp_dir:
            with NamedTemporaryFile(dir=tmp_dir, prefix=self.task_id) as f:
                # 临时目录下
                f.write(bytes(bash_command, 'utf_8'))
                f.flush()
                fname = f.name
                script_location = tmp_dir + "/" + fname
                self.log.info(
                    "Temporary script location: %s",
                    script_location
                )
                def pre_exec():
                    # Restore default signal disposition and invoke setsid
                    for sig in ('SIGPIPE', 'SIGXFZ', 'SIGXFSZ'):
                        if hasattr(signal, sig):
                            signal.signal(getattr(signal, sig), signal.SIG_DFL)
                    os.setsid()
                self.log.info("Running command: %s", bash_command)
                # 执行bash脚本
                sp = Popen(
                    ['bash', fname],
                    stdout=PIPE, stderr=STDOUT,
                    cwd=tmp_dir, env=self.env,
                    preexec_fn=pre_exec)

                self.sp = sp

                self.log.info("Output:")
                line = ''
                for line in iter(sp.stdout.readline, b''):
                    line = line.decode(self.output_encoding).strip()
                    self.log.info(line)
                sp.wait()
                self.log.info(
                    "Command exited with return code %s",
                    sp.returncode
                )

                if sp.returncode:
                    raise AirflowException("Bash command failed")

        if self.xcom_push_flag:
            return line

    def on_kill(self):
        self.log.info('Sending SIGTERM signal to bash process group')
        # 发送信号
        os.killpg(os.getpgid(self.sp.pid), signal.SIGTERM)
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值