1、Azkaban 3.0 中文文档
大数据平台 —— 调度系统之Azkaban
azkaban-web和azkaban-exec的一键启动和关闭(shell脚本)
解决 Azkaban Executor 启动后不会自动激活
Azkaban报错-azkaban.executor.ExecutorManagerException: No active executors found
azkaban启动和关闭脚本
#!/bin/bash
AZKABAN_WEB_PATH=/opt/module/azkaban/azkaban-web-server-3.84.4
AZKABAN_EXEC_PATH=/opt/module/azkaban/azkaban-exec-server-3.84.4
case $1 in
"start")
#开启3个exec server
echo "-----------------------starting exec-------------------------"
for host in hadoop102 hadoop103 hadoop104
do
echo "-----$host-----"
# eof标记前加'-'抑制首行Tab字符
ssh -T $host <<-eof
cd ${AZKABAN_EXEC_PATH}
rm -f executorServerLog*
bin/start-exec.sh
exit
eof
done
#修改mysql中的值
echo "---------------------waiting for executors-----------------------"
sleep 30s
echo "---------------------updata mysql columns-----------------------"
#ssh hadoop102 "/usr/bin/mysql -uazkaban -p000000 -e 'update azkaban.executors set active =1'" > /dev/null 2>&1
for host in hadoop102 hadoop103 hadoop104
do
echo "---------------------$host-----------------------"
# eof标记前加'-'抑制首行Tab字符
ssh -T $host <<-eof
curl -G "$host:12321/executor?action=activate" && echo
exit
eof
done
#启动web
echo "-----------------------starting web-------------------------"
cd ${AZKABAN_WEB_PATH}
bin/start-web.sh
;;
"stop")
echo "--------------------------stoping web-------------------------"
${AZKABAN_WEB_PATH}/bin/shutdown-web.sh
for host in hadoop102 hadoop103 hadoop104
do
echo "-------------------------stoping executor server in $host-----------------------"
ssh $host "${AZKABAN_EXEC_PATH}/bin/shutdown-exec.sh"
done
;;
esac
2、调度实战(先准备好log数据到hdfs,db数据到mysql)
那些年走过的azkaban的坑
gmall.flow
nodes:
- name: mysql_to_hdfs
type: command
config:
command: /home/atguigu/bin/mysql_to_hdfs.sh all ${dt}
- name: hdfs_to_ods_log
type: command
config:
command: /home/atguigu/bin/hdfs_to_ods_log.sh ${dt}
- name: hdfs_to_ods_db
type: command
dependsOn:
- mysql_to_hdfs
config:
command: /home/atguigu/bin/hdfs_to_ods_db.sh all ${dt}
- name: ods_to_dim_db
type: command
dependsOn:
- hdfs_to_ods_db
config:
command: /home/atguigu/bin/ods_to_dim_db.sh all ${dt}
- name: ods_to_dwd_log
type: command
dependsOn:
- hdfs_to_ods_log
config:
command: /home/atguigu/bin/ods_to_dwd_log.sh all ${dt}
- name: ods_to_dwd_db
type: command
dependsOn:
- hdfs_to_ods_db
config:
command: /home/atguigu/bin/ods_to_dwd_db.sh all ${dt}
- name: dwd_to_dws
type: command
dependsOn:
- ods_to_dim_db
- ods_to_dwd_log
- ods_to_dwd_db
config:
command: /home/atguigu/bin/dwd_to_dws.sh all ${dt}
- name: dws_to_dwt
type: command
dependsOn:
- dwd_to_dws
config:
command: /home/atguigu/bin/dws_to_dwt.sh all ${dt}
- name: dwt_to_ads
type: command
dependsOn:
- dws_to_dwt
config:
command: /home/atguigu/bin/dwt_to_ads.sh all ${dt}
- name: hdfs_to_mysql
type: command
dependsOn:
- dwt_to_ads
config:
command: /home/atguigu/bin/hdfs_to_mysql.sh all
gmall.project
azkaban-flow-version: 2.0
查看各节点的ID
方案一:指定azkaban执行脚本的节点
方案二:在Executor所在所有节点部署任务所需脚本和应用。
# 分发脚本、sqoop、spark、my_env.sh
[atguigu@hadoop102 ~]$ xsync /home/atguigu/bin/
[atguigu@hadoop102 ~]$ xsync /opt/module/hive
[atguigu@hadoop102 ~]$ xsync /opt/module/sqoop
[atguigu@hadoop102 ~]$ xsync /opt/module/spark
[atguigu@hadoop102 ~]$ sudo /home/atguigu/bin/xsync /etc/profile.d/my_env.sh
调度执行状态
PS:Azkaban配置
解压Azkaban部署包
[atguigu@hadoop102 azkaban]$ ll
总用量 12
drwxr-xr-x. 2 atguigu atguigu 4096 4月 18 2020 azkaban-db-3.84.4
drwxr-xr-x. 6 atguigu atguigu 4096 4月 18 2020 azkaban-exec-server-3.84.4
drwxr-xr-x. 6 atguigu atguigu 4096 4月 18 2020 azkaban-web-server-3.84.4
配置MySQL
# 登陆MySQL,创建Azkaban数据库
mysql> create database azkaban;
Query OK, 1 row affected (0.00 sec)
# 创建Azkaban用户,任何主机都可以访问Azkaban,密码是000000
mysql> CREATE USER 'azkaban'@'%' IDENTIFIED BY '000000';
Query OK, 0 rows affected (0.06 sec)
# 赋予Azkaban用户增删改查权限
mysql> GRANT SELECT,INSERT,UPDATE,DELETE ON azkaban.* to 'azkaban'@'%' WITH GRANT OPTION;
Query OK, 0 rows affected (0.04 sec)
# 创建Azkaban表,完成后退出MySQL
mysql> use azkaban;
Database changed
mysql> source /opt/module/azkaban/azkaban-db-3.84.4/create-all-sql-3.84.4.sql
# 更改MySQL包大小;防止Azkaban连接MySQL阻塞
[atguigu@hadoop102 azkaban]$ cat /etc/my.cnf
# For advice on how to change settings please see
# http://dev.mysql.com/doc/refman/5.7/en/server-configuration-defaults.html
[mysqld]
# 更改MySQL包大小;防止Azkaban连接MySQL阻塞
max_allowed_packet=1024M
#skip-grant-tables
#
# Remove leading # and set to the amount of RAM for the most important data
# cache in MySQL. Start at 70% of total RAM for dedicated server, else 10%.
# innodb_buffer_pool_size = 128M
#
# Remove leading # to turn on a very important data integrity option: logging
# changes to the binary log between backups.
# log_bin
#
# Remove leading # to set options mainly useful for reporting servers.
# The server defaults are faster for transactions and fast SELECTs.
# Adjust sizes as needed, experiment to find the optimal values.
# join_buffer_size = 128M
# sort_buffer_size = 2M
# read_rnd_buffer_size = 2M
datadir=/var/lib/mysql
socket=/var/lib/mysql/mysql.sock
# Disabling symbolic-links is recommended to prevent assorted security risks
symbolic-links=0
log-error=/var/log/mysqld.log
pid-file=/var/run/mysqld/mysqld.pid
# 重启MySQL
[atguigu@hadoop102 azkaban]$ sudo systemctl restart mysqld
配置Executor Server
# 编辑azkaban.properties
[atguigu@hadoop102 azkaban]$ cat /opt/module/azkaban/azkaban-exec-server-3.84.4/conf/azkaban.properties
# Azkaban Personalization Settings
azkaban.name=Test
azkaban.label=My Local Azkaban
azkaban.color=#FF3601
azkaban.default.servlet.path=/index
web.resource.dir=web/
# 修改时区
default.timezone.id=Asia/Shanghai
# Azkaban UserManager class
user.manager.class=azkaban.user.XmlUserManager
user.manager.xml.file=conf/azkaban-users.xml
# Loader for projects
executor.global.properties=conf/global.properties
azkaban.project.dir=projects
# Velocity dev mode
velocity.dev.mode=false
# Azkaban Jetty server properties.
jetty.use.ssl=false
jetty.maxThreads=25
jetty.port=8081
# Where the Azkaban web server is located
azkaban.webserver.url=http://hadoop102:8081
# mail settings
mail.sender=
mail.host=
# User facing web server configurations used to construct the user facing server URLs. They are useful when there is a reverse proxy between Azkaban web servers and users.
# enduser -> myazkabanhost:443 -> proxy -> localhost:8081
# when this parameters set then these parameters are used to generate email links.
# if these parameters are not set then jetty.hostname, and jetty.port(if ssl configured jetty.ssl.port) are used.
# azkaban.webserver.external_hostname=myazkabanhost.com
# azkaban.webserver.external_ssl_port=443
# azkaban.webserver.external_port=8081
job.failure.email=
job.success.email=
lockdown.create.projects=false
cache.directory=cache
# JMX stats
jetty.connector.stats=true
executor.connector.stats=true
# Azkaban plugin settings
azkaban.jobtype.plugin.dir=plugins/jobtypes
# Azkaban mysql settings by default. Users should configure their own username and password.
database.type=mysql
mysql.port=3306
# 修改主机
mysql.host=hadoop102
mysql.database=azkaban
mysql.user=azkaban
# 修改密码
mysql.password=000000
mysql.numconnections=100
# Azkaban Executor settings
executor.maxThreads=50
executor.flow.threads=30
# 设置端口
executor.port=12321
# 同步azkaban-exec到所有节点
[atguigu@hadoop102 azkaban]$ xsync /opt/module/azkaban/azkaban-exec-server-3.84.4/
启动executor server
[atguigu@hadoop102 azkaban-exec-server-3.84.4]$ pwd
/opt/module/azkaban/azkaban-exec-server-3.84.4
# 在三台机器上分别启动
[atguigu@hadoop102 azkaban-exec-server-3.84.4]$ bin/start-exec.sh
[atguigu@hadoop103 azkaban-exec-server-3.84.4]$ bin/start-exec.sh
[atguigu@hadoop104 azkaban-exec-server-3.84.4]$ bin/start-exec.sh
# azkaban启动成功标识
[atguigu@hadoop103 azkaban-exec-server-3.84.4]$ ll
-rw-rw-r--. 1 atguigu atguigu 5 5月 14 21:31 executor.port
激活executor
[atguigu@hadoop102 azkaban-exec-server-3.84.4]$ curl -G "hadoop102:12321/executor?action=activate" && echo
{"status":"success"}
[atguigu@hadoop103 azkaban-exec-server-3.84.4]$ curl -G "hadoop102:12321/executor?action=activate" && echo
{"status":"success"}
[atguigu@hadoop104 azkaban-exec-server-3.84.4]$ curl -G "hadoop102:12321/executor?action=activate" && echo
{"status":"success"}
配置Web Server
[atguigu@hadoop102 azkaban-exec-server-3.84.4]$ cat /opt/module/azkaban/azkaban-web-server-3.84.4/conf/azkaban.properties
# Azkaban Personalization Settings
azkaban.name=Test
azkaban.label=My Local Azkaban
azkaban.color=#FF3601
azkaban.default.servlet.path=/index
web.resource.dir=web/
# 更改时区
default.timezone.id=Asia/Shanghai
# Azkaban UserManager class
user.manager.class=azkaban.user.XmlUserManager
user.manager.xml.file=conf/azkaban-users.xml
# Loader for projects
executor.global.properties=conf/global.properties
azkaban.project.dir=projects
# Velocity dev mode
velocity.dev.mode=false
# Azkaban Jetty server properties.
jetty.use.ssl=false
jetty.maxThreads=25
jetty.port=8081
# Azkaban Executor settings
# mail settings
mail.sender=
mail.host=
# User facing web server configurations used to construct the user facing server URLs. They are useful when there is a reverse proxy between Azkaban web servers and users.
# enduser -> myazkabanhost:443 -> proxy -> localhost:8081
# when this parameters set then these parameters are used to generate email links.
# if these parameters are not set then jetty.hostname, and jetty.port(if ssl configured jetty.ssl.port) are used.
# azkaban.webserver.external_hostname=myazkabanhost.com
# azkaban.webserver.external_ssl_port=443
# azkaban.webserver.external_port=8081
job.failure.email=
job.success.email=
lockdown.create.projects=false
cache.directory=cache
# JMX stats
jetty.connector.stats=true
executor.connector.stats=true
# Azkaban mysql settings by default. Users should configure their own username and password.
database.type=mysql
mysql.port=3306
# 更改主机
mysql.host=hadoop102
mysql.database=azkaban
mysql.user=azkaban
# 更改密码
mysql.password=000000
mysql.numconnections=100
#Multiple Executor
azkaban.use.multiple.executors=true
#StaticRemainingFlowSize:正在排队的任务数;
#CpuStatus:CPU占用情况
#MinimumFreeMemory:内存占用情况。测试环境,必须将MinimumFreeMemory删除掉,否则它会认为集群资源不够,不执行
azkaban.executorselector.filters=StaticRemainingFlowSize,CpuStatus
azkaban.executorselector.comparator.NumberOfAssignedFlowComparator=1
azkaban.executorselector.comparator.Memory=1
azkaban.executorselector.comparator.LastDispatched=1
azkaban.executorselector.comparator.CpuUsage=1
[atguigu@hadoop102 conf]$ cat /opt/module/azkaban/azkaban-web-server-3.84.4/conf/azkaban-users.xml
<azkaban-users>
<user groups="azkaban" password="azkaban" roles="admin" username="azkaban"/>
<user password="metrics" roles="metrics" username="metrics"/>
<!--添加atguigu用户-->
<user password="atguigu" roles="metrics,admin" username="atguigu"/>
<role name="admin" permissions="ADMIN"/>
<role name="metrics" permissions="METRICS"/>
</azkaban-users>
启动web server
[atguigu@hadoop102 azkaban-web-server-3.84.4]$ pwd
/opt/module/azkaban/azkaban-web-server-3.84.4
[atguigu@hadoop102 azkaban-web-server-3.84.4]$ bin/start-web.sh
使用jieky登录:http://hadoop102:8081