import airflow
from airflow import DAG
from airflow.contrib.operators.databricks_operator import DatabricksSubmitRunOperator
from airflow.operators.email_operator import EmailOperator
from airflow.operators.bash_operator import BashOperator
from airflow.operators.http_operator import SimpleHttpOperator
from airflow.operators.sensors import HttpSensor
import json
from datetime import timedelta
from datetime import datetime
from airflow.models import Variable
from airflow.utils.trigger_rule import TriggerRule
args = {
'owner': 'airflow',
'email': ['test@gmail.com'],
'email_on_failure': True,
'email_on_retry': True,
'depends_on_past': False,
'start_date': airflow.utils.dates.days_ago(0),
'max_active_runs':10
}
dag = DAG(dag_id='TEST_DAG', default_args=args, schedule_interval='@once')
new_cluster = {
'spark_version': '4.0.x-scala2.11',
'node_type_id': 'Standard_D16s_v3',
'num_workers': 3,
'spark_conf':{
'spark.hadoop.javax.jdo.option.ConnectionDriverName':'org.postgresql.Driver',
.....
},
'custom_tags':{
'ApplicationName':'TEST',
.....
}
}
t1 = DatabricksSubmitRunOperator(
task_id='t1',
dag=dag,
new_cluster=new_cluster,
......
)
t2 = SimpleHttpOperator(
task_id='t2',
trigger_rule=TriggerRule.ONE_SUCCESS,
method='POST',
........
)
t2.set_upstream(t1)
t3 = SimpleHttpOperator(
task_id='t3',
trigger_rule=TriggerRule.ONE_SUCCESS,
method='POST',
.....
)
t3.set_upstream(t2)
AllTaskSuccess = EmailOperator (
dag=dag,
trigger_rule=TriggerRule.ALL_SUCCESS,
task_id="AllTaskSuccess",
to=["test@gmail.com"],
subject="All Task completed successfully",
html_content='<h3>All Task completed successfully" </h3>')
AllTaskSuccess.set_upstream([t1, t2,t3])
t1Failed = EmailOperator (
dag=dag,
trigger_rule=TriggerRule.ONE_FAILED,
task_id="t1Failed",
to=["test@gmail.com"],
subject="T1 Failed",
html_content='<h3>T1 Failed</h3>')
t1Failed.set_upstream([t1])
t2Failed = EmailOperator (
dag=dag,
trigger_rule=TriggerRule.ONE_FAILED,
task_id="t2Failed",
to=["test@gmail.com"],
subject="T2 Failed",
html_content='<h3>T2 Failed</h3>')
t2Failed.set_upstream([t2])
t3Failed = EmailOperator (
dag=dag,
trigger_rule=TriggerRule.ONE_FAILED,
task_id="t3Failed",
to=["test@gmail.com"],
subject="T3 Failed",
html_content='<h3>T3 Failed</h3>')
t3Failed.set_upstream([t3])
I managed it with the help of Airflow TriggerRule, Sample DAG given below :-
import airflow
from airflow import DAG
from airflow.contrib.operators.databricks_operator import DatabricksSubmitRunOperator
from airflow.operators.email_operator import EmailOperator
from airflow.operators.bash_operator import BashOperator
from airflow.operators.http_operator import SimpleHttpOperator
from airflow.operators.sensors import HttpSensor
import json
from datetime import timedelta
from datetime import datetime
from airflow.models import Variable
from airflow.utils.trigger_rule import TriggerRule
args = {
'owner': 'airflow',
'email': ['test@gmail.com'],
'email_on_failure': True,
'email_on_retry': True,
'depends_on_past': False,
'start_date': airflow.utils.dates.days_ago(0),
'max_active_runs':10
}
dag = DAG(dag_id='TEST_DAG', default_args=args, schedule_interval='@once')
new_cluster = {
'spark_version': '4.0.x-scala2.11',
'node_type_id': 'Standard_D16s_v3',
'num_workers': 3,
'spark_conf':{
'spark.hadoop.javax.jdo.option.ConnectionDriverName':'org.postgresql.Driver',
.....
},
'custom_tags':{
'ApplicationName':'TEST',
.....
}
}
t1 = DatabricksSubmitRunOperator(
task_id='t1',
dag=dag,
new_cluster=new_cluster,
......
)
t2 = SimpleHttpOperator(
task_id='t2',
trigger_rule=TriggerRule.ONE_SUCCESS,
method='POST',
........
)
t2.set_upstream(t1)
t3 = SimpleHttpOperator(
task_id='t3',
trigger_rule=TriggerRule.ONE_SUCCESS,
method='POST',
.....
)
t3.set_upstream(t2)
AllTaskSuccess = EmailOperator (
dag=dag,
trigger_rule=TriggerRule.ALL_SUCCESS,
task_id="AllTaskSuccess",
to=["test@gmail.com"],
subject="All Task completed successfully",
html_content='<h3>All Task completed successfully" </h3>')
AllTaskSuccess.set_upstream([t1, t2,t3])
t1Failed = EmailOperator (
dag=dag,
trigger_rule=TriggerRule.ONE_FAILED,
task_id="t1Failed",
to=["test@gmail.com"],
subject="T1 Failed",
html_content='<h3>T1 Failed</h3>')
t1Failed.set_upstream([t1])
t2Failed = EmailOperator (
dag=dag,
trigger_rule=TriggerRule.ONE_FAILED,
task_id="t2Failed",
to=["test@gmail.com"],
subject="T2 Failed",
html_content='<h3>T2 Failed</h3>')
t2Failed.set_upstream([t2])
t3Failed = EmailOperator (
dag=dag,
trigger_rule=TriggerRule.ONE_FAILED,
task_id="t3Failed",
to=["test@gmail.com"],
subject="T3 Failed",
html_content='<h3>T3 Failed</h3>')
t3Failed.set_upstream([t3])
Trigger Rules
Though the normal workflow behavior is to trigger tasks when all their directly upstream tasks have succeeded, Airflow allows for more complex dependency settings.
All operators have a trigger_rule argument which defines the rule by which the generated task get triggered. The default value for trigger_rule is all_success and can be defined as “trigger this task when all directly upstream tasks have succeeded”. All other rules described here are based on direct parent tasks and are values that can be passed to any operator while creating tasks:
all_success: (default) all parents have succeeded
all_failed: all parents are in a failed or upstream_failed state
all_done: all parents are done with their execution
one_failed: fires as soon as at least one parent has failed, it does not wait for all parents to be done
one_success: fires as soon as at least one parent succeeds, it does not wait for all parents to be done
dummy: dependencies are just for show, trigger at will
Reference : https://airflow.apache.org/concepts.html
参考文档
https://stackoverflow.com/questions/51726248/airflow-dag-customized-email-on-any-of-the-task-failure
对于那些寻找使用带有EmailOperator的jinja模板的确切示例的人来说,这里有一个
from airflow.operators.email_operator import EmailOperator
from datetime import timedelta, datetime
email_task = EmailOperator(
to='some@email.com',
task_id='email_task',
subject='Templated Subject: start_date {{ ds }}',
params={'content1': 'random'},
html_content="Templated Content: content1 - {{ params.content1 }} task_key - {{ task_instance_key_str }} test_mode - {{ test_mode }} task_owner - {{ task.owner}} hostname - {{ ti.hostname }}",
dag=dag)
参考文档