Apache Airflow --- 动态DAG

在这里插入图片描述


在这里插入图片描述


1. DAG

1.1 Tutorial

# [START tutorial]
# [START import_module]
from datetime import datetime, timedelta
from textwrap import dedent

# The DAG object; we'll need this to instantiate a DAG
from airflow import DAG

# Operators; we need this to operate!
from airflow.operators.bash import BashOperator

# [END import_module]


# [START instantiate_dag]
with DAG(
    'tutorial',
    # [START default_args]
    # These args will get passed on to each operator
    # You can override them on a per-task basis during operator initialization
    default_args={
        'depends_on_past': False,
        'email': ['airflow@example.com'],
        'email_on_failure': False,
        'email_on_retry': False,
        'retries': 1,
        'retry_delay': timedelta(minutes=5),
        # 'queue': 'bash_queue',
        # 'pool': 'backfill',
        # 'priority_weight': 10,
        # 'end_date': datetime(2016, 1, 1),
        # 'wait_for_downstream': False,
        # 'sla': timedelta(hours=2),
        # 'execution_timeout': timedelta(seconds=300),
        # 'on_failure_callback': some_function,
        # 'on_success_callback': some_other_function,
        # 'on_retry_callback': another_function,
        # 'sla_miss_callback': yet_another_function,
        # 'trigger_rule': 'all_success'
    },
    # [END default_args]
    description='A simple tutorial DAG',
    schedule_interval=timedelta(days=1),
    start_date=datetime(2021, 1, 1),
    catchup=False,
    tags=['example'],
) as dag:
    # [END instantiate_dag]

    # t1, t2 and t3 are examples of tasks created by instantiating operators
    # [START basic_task]
    t1 = BashOperator(
        task_id='print_date',
        bash_command='date',
    )

    t2 = BashOperator(
        task_id='sleep',
        depends_on_past=False,
        bash_command='sleep 5',
        retries=3,
    )
    # [END basic_task]

    # [START documentation]
    t1.doc_md = dedent(
        """\
    #### Task Documentation
    You can document your task using the attributes `doc_md` (markdown),
    `doc` (plain text), `doc_rst`, `doc_json`, `doc_yaml` which gets
    rendered in the UI's Task Instance Details page.
    ![img](http://montcs.bloomu.edu/~bobmon/Semesters/2012-01/491/import%20soul.png)

    """
    )

    dag.doc_md = __doc__  # providing that you have a docstring at the beginning of the DAG
    dag.doc_md = """
    This is a documentation placed anywhere
    """  # otherwise, type it like this
    # [END documentation]

    # [START jinja_template]
    templated_command = dedent(
        """
    {% for i in range(5) %}
        echo "{{ ds }}"
        echo "{{ macros.ds_add(ds, 7)}}"
    {% endfor %}
    """
    )

    t3 = BashOperator(
        task_id='templated',
        depends_on_past=False,
        bash_command=templated_command,
    )
    # [END jinja_template]

    t1 >> [t2, t3]
# [END tutorial]

1.2 动态Dag

import time
from datetime import datetime

from airflow import DAG
from airflow.models.taskinstance import TaskInstance
from airflow.operators.python import PythonOperator
from kafka_connect import KafkaConnect
import pymssql

with DAG(
    dag_id='hello_world',
    default_args={
        'owner': 'Nan',
        'depends_on_past': False,
        'email': ['nolan@163.com'],
        'email_on_failure': True,
        'email_on_retry': True,
        #'retries': 0,
        #'retry_delay': timedelta(minutes=5),
        # 'queue': 'bash_queue',
        # 'pool': 'backfill',
        # 'priority_weight': 10,
        # 'end_date': datetime(2016, 1, 1),
        # 'wait_for_downstream': False,
        # 'sla': timedelta(hours=2),
        # 'execution_timeout': timedelta(seconds=300),
        'on_failure_callback': failure_callback,
        'on_success_callback': success_callback,
        # 'on_retry_callback': another_function,
        # 'sla_miss_callback': yet_another_function,
        # 'trigger_rule': 'all_success'
    },
    # [END default_args]
    description='A simple template',
    schedule_interval=timedelta(days=1),
    start_date=datetime(2021, 1, 1),
    catchup=False,
    tags=['example'],
    concurrency=10
) as dag:
    #连接数据库拿资料
    def getConnStatus():
        conn = pymssql.connect(host="xxxx", database='xxx', user='xxxx', password='xxxx')
        cursor = conn.cursor()
        cursor.execute("select * from xxxxx")
        results = cursor.fetchall()

        #模拟数据
        #results = [(1, 'abc', 'xxx', 'xxx', 'xxx'),(2, 'bcd', 'xxx', 'xxx', 'xxx', ),(3, 'xyz', 'mssql.class', 'kafka_conn', 'key_source')]
        return results

    def execFunction(**kwargs):
        # 根据索引值,切分
        everyConn = kwargs.get("connMsg")
        # 将结果转string,因为结果多了(,)
        conn = str(everyConn)[2:-3]
        allMsg = conn.split(",")
        xxx = allMsg[0]
        xxx = allMsg[1]
        xxx = allMsg[2]
        
    
    #呼叫方法,然后遍历
    results = getConnStatus()
    for item in result:
        name = item[1]
        print(name)   #abc
        op = PythonOperator(
            task_id="monitor_conn_{}".format(name),
            python_callable=execFunction,
            op_kwargs={'connMsg': item},
            dag = dag,
        )

1.3 Parse Trigger config

  • 使用client
    airflow trigger_dag ‘example_dag_conf’ -r ‘run_id’ --conf ‘{“message”:“value”}’
  • 使用ui
    在这里插入图片描述
def parse_config(**kwargs):
    param = kwargs['dag_run'].conf['message']
    print(param)

2. Dag execute jar

import pymssql
import jpype
import os


def getConnStatus():
    conn = pymssql.connect(host="xxx.xxx.xx.xxx", database='xxx', user='xxx', password='xxx')
    cursor = conn.cursor()
    cursor.execute("select * from xxx")
    results = cursor.fetchall()
    return results

def getJVMClass():

    path = 'E:\\xxx\\airflow\\xxx\\xxxx-0.0.1-SNAPSHOT.jar'
    jar_path = os.path.join(os.path.abspath("."), path)

    #  2.获取jvm.dll 的文件路径
    jvmPath = jpype.getDefaultJVMPath()
    print(jvmPath)

    # 启动jvm
    jpype.startJVM(jvmPath, "-ea", "-Djava.class.path=%s" % jar_path)
    DataCheckClass = jpype.JClass("com.xxxx.controller.DataCheckController")
    return DataCheckClass()

if __name__ == '__main__':
    datacheck = getJVMClass()
    print("start jvm")
    results = getConnStatus()
    for item in results:
        systemName = item[1]
        pipelineType = item[2]
        if pipelineType == 'DB':
            task_id = "Check_Pipeline_{}".format(systemName),
            sourceConf = item[3]
            targetConf = item[4]
            count = datacheck.compareTableDiff(str(sourceConf), str(targetConf))
            print(count)

    jpype.shutdownJVM()

3. Force failure

from airflow.exceptions import AirflowFailException

//在execScript里面根据某个方法返回的结果调用task_to_fail()
op = PythonOperator(
    task_id="Check_conn_{}".format(connName),
	  python_callable= execScript,
	  op_kwargs = {'s_num': s_num},
	  dag=dag, 
) 

def task_to_fail():
    raise AirflowFailException("Force airflow to fail here!")

4. Dag

4.1 Cron

  • 官方提供
    Cron
  • 也可以参考后面自定义

4.2 start date

  • 将start date硬编码,然后设定排程
    在这里插入图片描述
  • 解决方法
    在这里插入图片描述

5. Waken

         在一秒钟内看到本质的人和花半辈子也看不清一件事本质的人,自然是不一样的命运。
在这里插入图片描述

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

百世经纶『一页書』

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值