python airflow_airflow安装与使用

1.安装环境

CentOS-6.5

Python-2.7.12

setuptools-29.0.1

pip-9.0.1

2.编译Python

sudo yum install -y gcc

sudo yum install -y gcc-c++

sudo yum install -y wget

sudo yum install -y mysql

sudo yum install -y mysql-devel

sudo yum install -y python-devel

sudo yum install -y zlib-devel

sudo yum install -y openssl-devel

sudo yum install -y sqlite-devel

wget https://www.python.org/ftp/python/2.7.12/Python-2.7.12.tgz

sudo mkdir /usr/local/python27

sudo tar zxfv Python-2.7.12.tgz -C /usr/local/

cd /usr/local/Python-2.7.12/

./configure --prefix=/usr/local/python27

make

make install

sudo mv /usr/bin/python /usr/bin/python2.6

sudo ln -sf /usr/local/python/bin/python /usr/bin/python2.7

vim /usr/bin/yum

#!/usr/bin/python2.6

vim /etc/profile

export PYTHON_HOME=/usr/bin/python2.6

export PATH=$PYTHON_HOME/bin:$PATH

wget https://pypi.python.org/packages/59/88/2f3990916931a5de6fa9706d6d75eb32ee8b78627bb2abaab7ed9e6d0622/setuptools-29.0.1.tar.gz#md5=28ecfd0f2574b489b9a18343879a7324

tar zxfv setuptools-29.0.1.tar.gz

cd setuptools-29.0.1

python setup.py install

wget https://pypi.python.org/packages/11/b6/abcb525026a4be042b486df43905d6893fb04f05aac21c32c638e939e447/pip-9.0.1.tar.gz#md5=35f01da33009719497f01a4ba69d63c9

tar zxfv pip-9.0.1.tar.gz

cd pip-9.0.1

python setup.py install

pip install --upgrade pip

wget https://pypi.python.org/packages/a5/e9/51b544da85a36a68debe7a7091f068d802fc515a3a202652828c73453cad/MySQL-python-1.2.5.zip#md5=654f75b302db6ed8dc5a898c625e030c

unzip MySQL-python-1.2.5.zip

cd MySQL-python-1.2.5

python setup.py install

#第三方包 /usr/local/python27/lib/python2.7/site-packages

3.安装

airflow通过pip可以方便的安装到系统中。

# airflow needs a home, ~/airflow is the default,

# but you can lay foundation somewhere else if you prefer

# (optional)

export AIRFLOW_HOME=/usr/local/airflow

# install from pypi using pip

pip install airflow

pip install airflow[hive]

# initialize the database

airflow initdb

# start the web server, default port is 8080

airflow webserver -p 8080

4.设置mysql为元数据库

#首先要安装mysql客户端

sudo yum install -y mysql

sudo yum install -y mysql-devel

CREATE USER airflow;

CREATE DATABASE airflow;

CREATE DATABASE celery_result_airflow;

GRANT all privileges on airflow.* TO 'airflow'@'%' IDENTIFIED BY 'airflow';

GRANT all privileges on celery_result_airflow.* TO 'airflow'@'%' IDENTIFIED BY 'airflow';

#安装mysql模块

wget https://pypi.python.org/packages/a5/e9/51b544da85a36a68debe7a7091f068d802fc515a3a202652828c73453cad/MySQL-python-1.2.5.zip#md5=654f75b302db6ed8dc5a898c625e030c

unzip MySQL-python-1.2.5.zip

cd MySQL-python-1.2.5

python setup.py install

#在airflow的配置文件中配置mysql为元数据的存储库

sudo vi $AIRFLOW_HOME/airflow.cfg

#更改数据库链接:

sql_alchemy_conn = mysql://airflow:airflow@localhost:3306/airflow

#对应字段解释如下:

dialect+driver://username:password@host:port/database

#初始化元数据库

airflow initdb

#重置元数据库

airflow resetdb

5.安装登录模块

#安装password模块

pip install airflow[password]

#在airflow的配置文件中修改需要认证

sudo vi $AIRFLOW_HOME/airflow.cfg

[webserver]

authenticate = True

filter_by_owner = True

auth_backend = airflow.contrib.auth.backends.password_auth

运行以下代码将用户名密码写入元数据库中

import airflow

from airflow import models, settings

from airflow.contrib.auth.backends.password_auth import PasswordUser

user = PasswordUser(models.User())

user.username = 'quzhengpeng'

user.email = 'quzhengpeng@163.com'

user.password = 'quzhengpeng'

session = settings.Session()

session.add(user)

session.commit()

session.close()

exit()

5.启动守护进程

启动后台守护进程了之后,Airflow才能实时监控任务的调度情况。将任务脚本放到${AIRFLOW_HOME}/dags下在web UI 就能看到任务执行情况。

airflow scheduler

6.启动web服务

#启动web进程

airflow webserver -p 8080

#关闭CentOS6的防火墙

sudo service iptables stop

#关闭CentOS6的SELinux

setenforce 0

#关闭CentOS7的防火墙

systemctl stop firewalld.service

#禁止firewall开机启动

systemctl disable firewalld.service

Celery+MySQL

#Celery文档 http://docs.jinkan.org/docs/celery/index.html

#Celery4.0.0在airflow中有一些问题,所以安装Celery3

pip install -U Celery==3.1.24

pip install airflow[celery]

修改配置文件

vi airflow.cfg

[core]

executor = CeleryExecutor

[celery]

broker_url = sqla+mysql://airflow:airflow@localhost:3306/airflow

celery_result_backend = db+mysql://airflow:airflow@localhost:3306/airflow

启动airflow

airflow webserver -p 8080

airflow scheduler

#以非root用户运行

airflow worker

#启动Celery WebUI 查看celery任务

airflow flower

http://localhost:5555/

Celery+RabbitMQ

wget http://www.rabbitmq.com/releases/rabbitmq-server/v3.6.5/rabbitmq-server-3.6.5-1.noarch.rpm

#安装RabbitMQ的依赖包

yum install erlang

yum install socat

#如果下载了rabbitmq的yum源 sudo yum install -y rabbitmq-server

rpm -ivh rabbitmq-server-3.6.5-1.noarch.rpm

启动RabbitMQ服务

#启动rabbitmq服务

sudo service rabbitmq-server start

#或者

sudo rabbitmq-server

#添加 -detached 属性来让它在后台运行(注意:只有一个破折号)

sudo rabbitmq-server -detached

#设置开机启动rabbitmq服务

chkconfig rabbitmq-server on

#永远不要用 kill 停止 RabbitMQ 服务器,而是应该用 rabbitmqctl 命令

sudo rabbitmqctl stop

设置RabbitMQ

#创建一个RabbitMQ用户

rabbitmqctl add_user airflow airflow

#创建一个RabbitMQ虚拟主机

rabbitmqctl add_vhost vairflow

#将这个用户赋予admin的角色

rabbitmqctl set_user_tags airflow admin

#允许这个用户访问这个虚拟主机

rabbitmqctl set_permissions -p vairflow airflow ".*" ".*" ".*"

# no usage

rabbitmq-plugins enable rabbitmq_management

修改airflow配置文件支持Celery

vi $AIRFLOW_HOME/airflow/airflow.cfg

#更改Executor为CeleryExecutor

executor = CeleryExecutor

#更改broker_url

broker_url = amqp://airflow:airflow@localhost:5672/vairflow

Format explanation: transport://userid:password@hostname:port/virtual_host

#更改celery_result_backend

celery_result_backend = amqp://airflow:airflow@localhost:5672/vairflow

Format explanation: transport://userid:password@hostname:port/virtual_host

安装airflow的celery和rabbitmq模块

pip install airflow[celery]

pip install airflow[rabbitmq]

airflow使用DAG(Directed Acyclic Graph,有向无环图为)来管理作业流的

#创建DAG

from datetime import datetime, timedelta

from airflow.models import DAG

args = {

'owner': 'airflow',

'start_date': seven_days_ago,

'email': ['airflow@airflow.com'],

'email_on_failure': True,

'email_on_retry': True,

'retries': 3,

'retries_delay': timedelta(seconds=60),

'depends_on_past': True

}

dag = DAG(

dag_id='dag',

default_args=args,

schedule_interval='0 0 * * *',

dagrun_timeout=timedelta(minutes=60)

)

创建任务将任务添加到DAG中

from airflow.operators.bash_operator import BashOperator

from airflow.operators.dummy_operator import DummyOperator

demo = DummyOperator(

task_id='demo',

dag=dag

)

last_execute = BashOperator(

task_id='last_execute',

bash_command='echo 1',

dag=dag

)

配置任务的依赖关系

demo.set_downstream(last_execute)

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值