目录
在master01和master02这两个机器搭建scheduler HA
1.安装python3.7
安装Linux系统依赖模块
[root@master02 ~]# yum -y install zlib-devel bzip2-devel openssl-devel ncurses-devel sqlite-devel readline-devel tk-devel gdbm-devel db4-devel libpcap-devel xz-devel gcc gcc-devel make mysql-devel python-devel libffi-devel unzip
安装python3.7前,一定要先安装libffi-devel,不然使用python3.7安装airflow时,会报错缺少_ctypes模块
ModuleNotFoundError: No module named '_ctypes'
安装python3.7
[root@master02 ~]# cd /usr/local
[root@master02 local]# wget https://www.python.org/ftp/python/3.7.11/Python-3.7.11.tgz
[root@master02 local]# tar -zxf Python-3.7.11.tgz
[root@master02 local]# cd Python-3.7.11
[root@master02 Python-3.7.11]# ./configure --prefix=/usr/local/python37
[root@master02 Python-3.7.11]# make
[root@master02 Python-3.7.11]# make install
设置环境变量
vim ~/.bash_profile
export PYTHON_HOME=/usr/local/python37
export PATH=$PYTHON_HOME/bin:$PATH
source ~/.bash_profile
2.安装mysql5.7
[root@master02 ~]# wget http://repo.mysql.com/mysql57-community-release-el7-10.noarch.rpm
[root@master02 ~]# yum -y install mysql57-community-release-el7-10.noarch.rpm
[root@master02 ~]# yum -y install mysql-community-server
[root@master02 ~]# systemctl start mysqld.service
从mysql日志中查看mysql的root用户密码
[root@master02 ~]# vim /var/log/mysqld.log
2023-05-04T13:43:38.504211Z 1 [Note] A temporary password is generated for root@localhost: #:tapCd>D6G<
修改mysql的root用户密码
[root@master02 ~]# mysql -uroot -h127.0.0.1 -p"#:tapCd>D6G<"
mysql> alter user 'root'@'localhost' identified by 'Root$12345678';
mysql> quit
创建airflow数据库,创建用户airflow,给予airflow数据库全部权限
[root@master02 ~]# mysql -uroot -h127.0.0.1 -p'Root$12345678'
mysql> CREATE DATABASE airflow CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
mysql> CREATE USER 'airflow' IDENTIFIED BY 'airflow';
ERROR 1819 (HY000): Your password does not satisfy the current policy requirements
mysql> SHOW VARIABLES LIKE 'validate_password%';
mysql> set global validate_password_policy=LOW;
mysql> set global validate_password_length=5;
mysql> CREATE USER 'airflow' IDENTIFIED BY 'airflow';
mysql> GRANT ALL PRIVILEGES ON airflow.* TO 'airflow';
mysql> flush privileges;
修改mysql配置
vim /etc.my.cnf
explicit_defaults_for_timestamp=ON
systemctl restart mysqld
3.安装airflow2.5.3
1.安装Linux系统依赖模块
yum -y install zlib-devel bzip2-devel openssl-devel ncurses-devel sqlite-devel readline-devel tk-devel gdbm-devel db4-devel libpcap-devel xz-devel gcc gcc-devel make mysql-devel python-devel libffi-devel unzip
2.设置全局的环境变量
vim /etc/profile
export AIRFLOW_HOME=/usr/local/airflow
source /etc/profile
3.使用pip3安装airflow2.5.3
pip3 install --upgrade pip
pip3 install "apache-airflow[celery]==2.5.3" --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.5.3/constraints-3.7.txt"
pip3 install apache-airflow[password]==2.5.3 --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.5.3/constraints-3.7.txt"
pip3 install apache-airflow[mysql]==2.5.3 --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.5.3/constraints-3.7.txt"
pip3 install flask_bcrypt --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.5.3/constraints-3.7.txt"
pip3 install pymysql --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.5.3/constraints-3.7.txt"
pip3 install celery --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.5.3/constraints-3.7.txt"
pip3 install redis==3.5.3 --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.5.3/constraints-3.7.txt"
pip3 install flower --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.5.3/constraints-3.7.txt"
pip3 install pandas --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.5.3/constraints-3.7.txt"
4.配置metadata数据库
执行airflow初始化数据库命令
airflow db init
可能会报错
airflow.exceptions.AirflowConfigException: error: sqlite C library version too old (< 3.15.0).
没有关系,这里只是为了让airflow在AIRFLOW_HOME目录下生成airflow.cfg配置文件,我们不打算使用原生的sqlite数据库
接下来我们需要修改配置文件里面的数据库相关配置项
vim $AIRFLOW_HOME/airflow.cfg
修改[database]下的sql_alchemy_conn配置项
sql_alchemy_conn = mysql+pymysql://airflow:airflow@master02/airflow
再次执行airflow初始化数据库命令
airflow db init
现在已经在数据库airflow生成了相关的元数据表
airflow会自动生成了admin用户,如果需要的话,可以使用airflow命令行新建用户并指定角色
airflow users create --username admin --firstname admin --lastname admin --role Admin --email terry.wu@klook.com --password admin
修改配置文件airflow.cfg
vim /usr/local/airflow/airflow.cfg
[core]
sql_alchemy_conn = mysql+pymysql://airflow:airflow@master02/airflow
executor = CeleryExecutor
default_timezone = Asia/Shanghai
[celery]
broker_url = redis://master02:6379/0
result_backend = db+mysql://airflow:airflow@master02/airflow
[webserver]
default_ui_timezone = Asia/Shanghai
启动airflow的webserver,-D表示后台运行
airflow webserver --port 8080 -D
5.搭建scheduler HA
要在master01和 master02这两个机器搭建HA,首先需要对这两个机器设置免密登陆
[root@master01 ~]# cd $AIRFLOW_HOME
[root@master01 airflow]# mkdir software
[root@master01 airflow]# cd software/
[root@master01 software]# wget https://github.com/teamclairvoyant/airflow-scheduler-failover-controller/archive/refs/heads/master.zip
[root@master01 software]# yum -y install unzip
[root@master01 software]# unzip master.zip
[root@master01 software]# cd airflow-scheduler-failover-controller-master/
[root@master01 airflow-scheduler-failover-controller-master]# pip3 install -e .
[root@master01 airflow-scheduler-failover-controller-master]# scheduler_failover_controller init
这时候初始化命令会在配置文件airflow.cfg最后追加新的配置模块[scheduler_failover]
需要改动[scheduler_failover]下面几个配置项
[scheduler_failover]
scheduler_nodes_in_cluster = master01,master02
metadata_service_type = SQLMetadataService
airflow_scheduler_start_command = export AIRFLOW_HOME=/usr/local/airflow;nohup /usr/local/python37/bin/airflow scheduler >& /usr/local/airflow/logs/scheduler1.logs 2>&1 &
测试连通性
[root@master01 airflow]# scheduler_failover_controller test_connection
Testing Connection for host 'master01'
(True, ['Connection Succeeded\n'])
Testing Connection for host 'master02'
(True, ['Connection Succeeded\n'])
这里airflow-scheduler-failover-controller有一个bug需要修正,不然会报错
AttributeError: 'NoneType' object has no attribute '_instantiate_plugins'
vim /usr/local/airflow/software/airflow-scheduler-failover-controller-master/scheduler_failover_controller/configuration.py
把
return self.get_config("core", "SQL_ALCHEMY_CONN")
修改成
return self.get_config("database", "SQL_ALCHEMY_CONN")
打印元数据信息,测试airflow-scheduler-failover-controller是否正常
scheduler_failover_controller metadata
分别在master01和master02服务器,启动airflow-scheduler-failover-controller
nohup scheduler_failover_controller start >> /usr/local/airflow/nohup_scheduler_failover_run.log &
这时scheuler无需再手动启动,这两个机器的scheduler_failover_controller进程会选出ACTIVE节点,再由这个节点去启动scheduler
最后在worker服务器启动worker
airflow celery worker -D