1.概要
在airflow流程中,有时候需要按条件中断,这里借助分支实现
2.示例
zk和kafka部署( 下图只部署kafka) 。3种情况 1-只部署zk | 2-只部署kafka | 3-同时部署
3. BranchPythonOperator
# !/usr/bin/python
# -*- coding: utf-8 -*-
import json
from airflow import DAG
from airflow.exceptions import AirflowSkipException, AirflowException
from airflow.operators.dummy_operator import DummyOperator
from airflow.utils.dates import days_ago
from airflow.operators.python_operator import PythonOperator, BranchPythonOperator
from datetime import timedelta
from airflow.utils.trigger_rule import TriggerRule
default_args = {
'owner': 'admin',
'start_date': days_ago(1),
'retries': 3,
'retry_delay': timedelta(seconds=10),
}
dag = DAG('zk_kafka', default_args=default_args, schedule_interval=None)
def printParams(*args, **kwargs):
print(args)
print(kwargs.get('params'))
print(kwargs.get('dag_run').conf)
class AirflowTermination(AirflowException):
status_code = 1024
# ------common-----------
def checkMiddware(type, **kwargs):
pass
# -----------zookeeper------------------
def deployZookeeper(*args, **kwargs):
printParams(*args, **kwargs)
params = kwargs.get('dag_run').conf
pass
checkZk = PythonOperator(
task_id="CheckZookeeper",
provide_context=True,
python_callable=checkMiddware,
op_args=['zookeeper'],
dag=dag
)
deployZk = PythonOperator(
task_id="DeployZookeeper",
provide_context=True,
python_callable=deployZookeeper,
trigger_rule=TriggerRule.ALL_SUCCESS,
dag=dag
)
def deployKafka(*args, **kwargs):
printParams(*args, **kwargs)
params = kwargs.get('dag_run').conf
pass
checkKfk = PythonOperator(
task_id="CheckKafka",
provide_context=True,
python_callable=checkMiddware,
op_args=['kafka'],
dag=dag
)
deployKfk = PythonOperator(
task_id="DeployKafka",
provide_context=True,
python_callable=deployKafka,
trigger_rule=TriggerRule.ALL_SUCCESS,
dag=dag
)
def shoudDeploy(type, **kwargs):
params = kwargs.get('dag_run').conf
contain = type in params and params[type] is not None
if "zookeeper" == type:
return 'CheckZookeeper' if contain else "SkipZookeeper"
elif "kafka" == type:
return 'CheckKafka' if contain else "SkipKafka"
else:
return 'end'
startZookeeper = BranchPythonOperator(
task_id='StartZookeeper',
provide_context=True,
python_callable=shoudDeploy,
op_args=['zookeeper'],
trigger_rule=TriggerRule.ALL_DONE,
dag=dag,
)
startKafka = BranchPythonOperator(
task_id='StartKafka',
provide_context=True,
python_callable=shoudDeploy,
op_args=['kafka'],
trigger_rule=TriggerRule.ALL_DONE,
dag=dag,
)
start = DummyOperator(
task_id='start',
trigger_rule=TriggerRule.ALL_DONE,
dag=dag,
)
end = DummyOperator(
task_id='end',
trigger_rule=TriggerRule.ALL_DONE,
dag=dag,
)
# common--------------------
skip1 = DummyOperator(task_id='SkipZookeeper', dag=dag)
skip2 = DummyOperator(task_id='SkipKafka', dag=dag)
skip1 >> end
skip2 >> end
# zookeeper-----------------
start >> startZookeeper >> [skip1, checkZk]
checkZk >> deployZk >> end
# kafka----------------------
start >> startKafka >> [skip2, checkKfk]
checkKfk >> deployKfk >> end