airflow从datahub获取数据到mysql数据库

使用airflow从datahub获取数据到DB,修改数据,打包csv档发送minio,task执行成功邮件通知复写等

import smtplib
import pendulum
from email.mime.text import MIMEText
from email.header import Header
from airflow import DAG
from airflow.utils.dates import days_ago
from datahub.operators.datahub_to_rdb import DatahubToRDBOperator
from datetime import datetime, timedelta
from airflow.operators.python import PythonOperator
from airflow.operators.dummy import DummyOperator
from airflow.providers.mysql.operators.mysql import MySqlOperator
import pandas as pd
import boto3
from airflow.hooks.mysql_hook import MySqlHook
import tempfile
from airflow.exceptions import AirflowException
import math


def  readdata():
    # 创建mysql引擎
    mysql_hook=MySqlHook(mysql_conn_id="数据库连接id")
    sqlalchemy_engine=mysql_hook.get_sqlalchemy_engine();
    try:
      df=pd.read_sql("SELECT *   FROM  employeeinfo  ",sqlalchemy_engine)
      print(df.head())
      print('--------------')
      print(df.info())
      #生成test.csv档,并设置索引为false
      df.to_csv("test.csv",encoding='utf_8_sig',index=False,)
      #设置s3 minio的配置
      s3_resource = boto3.resource('s3',
                                   endpoint_url='minio的网址',
                                   aws_access_key_id='minio账号',
                                   aws_secret_access_key='minio密码',
                                   region_name='us-east-1')  # 區域編碼
      # 放入minio中的文件名称
      my_object_key_01 = "test1.csv"
      #uploadfiles为minio的bucket名,test.csv为需要发送的文件名
      s3_resource.Object('uploadfiles', my_object_key_01).upload_file(
          'test.csv')
    except Exception as e:
        raise AirflowException(f"read table to dataframe fail:{e}")




def send_email(mail_msg):
    # ----------发送者
    mail_host = "邮件服务器"  # 设置服务器
    #mail_user = "airflow"  # 用户名,发送者邮箱
    # mail_pass = "password"  # 口令, 可以去自己的邮箱设置中查看
    sender = 'airflow@mail.com' # 发送者邮箱

    # ----------接收者
    receivers = ['xxxx@mail.com']  # 接收邮件,可设置为你的QQ邮箱或者其他邮箱
    message = MIMEText(mail_msg, 'html', 'utf-8')
    #发送者别称
    message['From'] = Header("airflow")
    #接受者别称
    message['To'] = Header("xxxx")

    subject = 'Airflow 任务告警'
    message['Subject'] = Header(subject, 'utf-8')

    try:
        smtp_obj = smtplib.SMTP()
        smtp_obj.connect(mail_host, 25)    # 25 为 SMTP 端口号
        smtp_obj.sendmail(sender, receivers, message.as_string())
    except smtplib.SMTPException:
        print("Error: 无法发送邮件")


def send_email_fun(msg):
    send_email(msg)


def success_callback(context):
    dag_id = context['dag'].dag_id
    task_id = context['task_instance'].task_id
    #当前时间+8h,如果时间正确,则不需要加
    current_timestamp=math.trunc(datetime.now().timestamp()+28800)
    current_time=datetime.fromtimestamp(current_timestamp)
    mail_msg = f"""
        <p>日期:{current_time}</p>
        <p>DAG ID:{dag_id}...</p>
        <p>Task id:{task_id}...</p>
        <P>SUCCESS</P>
        """
    send_email_fun(mail_msg)




sqls="""
 update employeeinfo  SET deptid='xxx'
 WHERE emplid='xxxx';
"""

# def exequtesql(host,dbname,user,password,sqls):
#     db_conn = pymysql.connect(host=host, dbname=dbname, user=user, password=password)
#     print('Connect  database successfully!')
#     # 创建连接数据库的游标
#     cur = db_conn.cursor()
#     # 执行sql语句
#     cur.execute(sqls)
#     cur.close()
#     print('exequte sql success')


local_tz = pendulum.timezone("Asia/Shanghai")
start_date = datetime(2022, 7, 14, 15, 55, 19, tzinfo=local_tz)

default_args = {
    'owner':'xxxx', # owner是DAG的開發者, 例如: 員工8703147
    # 'retries': 3, # maximum retry time
    # 'retry_delay': timedelta(minutes=2), # retry interval time
    'email_on_failure': True, # sending error notification or not
    'email': ['xxxx@mail.com'],# notification receiver
    'start_date': start_date,
    'on_success_callback': success_callback,
}

# dag setting
dag = DAG(
    dag_id='dag的id,自定义',
    schedule_interval=None,
    start_date=days_ago(2),
    catchup=False,
    default_args=default_args,
    access_control={
        '租户id': {'can_read', 'can_edit'}  # 設定DAG歸屬那個團隊[tenant id]與權限
    },
    tags=['test'])

# producer setting
con_whq_s3tohub_test_task = DatahubToRDBOperator(
        task_id='employee_test',
        datahub_conn_id='datahub的id',  # datahub帳密的connection_id
        datahub_topic='订阅的topic',  # 要訂閱的topic
        # datahub_topic_version=1,  # 指定特定的schema版本
        rdb_type='mysql',  # target資料庫類型
        rdb_conn_id='数据库连接id',  # target數據庫的connection_id
        table_name='employeeinfo',  # target資料表
        # datahub_batch_id = '20210908060505182', # 指定要拿取的資料
        dag=dag)





sql_update = MySqlOperator(
    task_id='sql_update',
    mysql_conn_id='数据库连接id',
    sql=sqls,
    autocommit=True,
    dag=dag,
)
# task_notify = DummyOperator(task_id='notify', dag=dag)
# task dependancy

def print_success(**context):
    print("Mail is send success")

success_callback=PythonOperator(
    task_id='send_email',
    python_callable=print_success,
    provide_context=True,
    dag=dag,
)


getdata=PythonOperator(
    task_id='getdata',
    python_callable=readdata,
    dag=dag,
)

con_whq_s3tohub_test_task  >> sql_update  >> getdata >> success_callback
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值