Azkaban
简介
Azkaban是在LinkedIn上创建的批处理工作流作业调度程序,用于运行Hadoop作业。Azkaban通过作业依赖性解决订单,并提供易于使用的Web用户界面来维护和跟踪您的工作流程
特点
- 兼容任何版本的Hadoop
- 易于使用的Web UI
- 简单的Web和http工作流上传
- 调度工作流程
- 模块化和可插入
- 身份验证和授权
- 跟踪用户操作
- 有关失败和成功的电子邮件提醒
- SLA警报和自动查杀
- 重试失败的工作
安装
(1)Git下载源码
git clone https://github.com/azkaban/azkaban.git
(2)在Azkaban目录下,gradlew压缩获取tar包
./gradlew distTar
(3)解压Azkaban的db、exec-server和web-server压缩包,并重命名
(4)将azkaban-db-3.73.0的create-all-sql-0.1.0-SNAPSHOT.sql导入mysql
(5)生成keystore证书
keytool -keystore keystore -alias jetty -genkey -keyalg RSA
(6)将keystore证书移动到azkaban-web-server-3.73.0文件夹下
mv keystore azkaban-web-server-3.73.0/
(7)时间同步配置
sudo date -s '2019-05-05 21:25:00'
hwclock -w
cp /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
(8)配置web-server的azkaban.properties文件
# Azkaban Personalization Settings
azkaban.name=Test
azkaban.label=My Local Azkaban
azkaban.color=#FF3601
azkaban.default.servlet.path=/index
web.resource.dir=web/
default.timezone.id=Asia/Shanghai
# Azkaban UserManager class
user.manager.class=azkaban.user.XmlUserManager
user.manager.xml.file=/usr/local/azkaban/azkaban-web-server/conf/azkaban-users.xml
# Loader for projects
executor.global.properties=/usr/local/azkaban/azkaban-web-server/conf/global.properties
azkaban.project.dir=projects
# Velocity dev mode
velocity.dev.mode=false
# Azkaban Jetty server properties.
jetty.use.ssl=true
jetty.maxThreads=25
jetty.ssl.port=8443
jetty.port=8081
jetty.keystore=/usr/local/azkaban/azkaban-web-server/keystore
jetty.password=123456
jetty.keypassword=123456
jetty.truststore=/usr/local/azkaban/azkaban-web-server/keystore
jetty.trustpassword=123456
# Azkaban Executor settings
# mail settings
mail.sender=strikefreedom2019@163.com
mail.host=smtp.163.com
# User facing web server configurations used to construct the user facing server URLs. They are useful when there is a reverse proxy between Azkaban web servers and users.
# enduser -> myazkabanhost:443 -> proxy -> localhost:8081
# when this parameters set then these parameters are used to generate email links.
# if these parameters are not set then jetty.hostname, and jetty.port(if ssl configured jetty.ssl.port) are used.
# azkaban.webserver.external_hostname=myazkabanhost.com
# azkaban.webserver.external_ssl_port=443
# azkaban.webserver.external_port=8081
job.failure.email=strikefreedom2019@163.com
job.success.email=strikefreedom2019@163.com
lockdown.create.projects=false
cache.directory=cache
# JMX stats
jetty.connector.stats=true
executor.connector.stats=true
# Azkaban mysql settings by default. Users should configure their own username and password.
database.type=mysql
mysql.port=3306
mysql.host=hadoop1
mysql.database=azkaban
mysql.user=root
mysql.password=root
mysql.numconnections=100
#Multiple Executor
azkaban.use.multiple.executors=true
azkaban.executorselector.filters=StaticRemainingFlowSize,MinimumFreeMemory,CpuStatus
azkaban.executorselector.comparator.NumberOfAssignedFlowComparator=1
azkaban.executorselector.comparator.Memory=1
azkaban.executorselector.comparator.LastDispatched=1
azkaban.executorselector.comparator.CpuUsage=1
(9)配置web-server的azkaban-users.xml文件
<azkaban-users>
<user groups="azkaban" password="azkaban" roles="admin" username="azkaban"/>
<user password="metrics" roles="metrics" username="metrics"/>
<user password="admin" roles="admin,metrics" username="admin"/>
<role name="admin" permissions="ADMIN"/>
<role name="metrics" permissions="METRICS"/>
</azkaban-users>
(10)配置exec-server的azkaban.properties文件
# Azkaban Personalization Settings
azkaban.name=Test
azkaban.label=My Local Azkaban
azkaban.color=#FF3601
azkaban.default.servlet.path=/index
web.resource.dir=web/
default.timezone.id=Asia/Shanghai
# Azkaban UserManager class
user.manager.class=azkaban.user.XmlUserManager
user.manager.xml.file=/usr/local/azkaban/azkaban-exec-server/conf/azkaban-users.xml
# Loader for projects
executor.global.properties=/usr/local/azkaban/azkaban-exec-server/conf/global.properties
azkaban.project.dir=projects
# Velocity dev mode
velocity.dev.mode=false
# Azkaban Jetty server properties.
jetty.maxThreads=25
jetty.ssl.port=8443
jetty.port=8081
jetty.keystore=keystore
jetty.password=123456
jetty.keypassword=123456
jetty.truststore=keystore
jetty.trustpassword=123456
# Where the Azkaban web server is located
azkaban.webserver.url=http://localhost:8081
# mail settings
mail.sender=strikefreedom2019@163.com
mail.host=smtp.163.com
# User facing web server configurations used to construct the user facing server URLs. They are useful when there is a reverse proxy between Azkaban web servers and users.
# enduser -> myazkabanhost:443 -> proxy -> localhost:8081
# when this parameters set then these parameters are used to generate email links.
# if these parameters are not set then jetty.hostname, and jetty.port(if ssl configured jetty.ssl.port) are used.
# azkaban.webserver.external_hostname=myazkabanhost.com
# azkaban.webserver.external_ssl_port=443
# azkaban.webserver.external_port=8081
job.failure.email=strikefreedom2019@163.com
job.success.email=strikefreedom2019@163.com
lockdown.create.projects=false
cache.directory=cache
# JMX stats
jetty.connector.stats=true
executor.connector.stats=true
# Azkaban plugin settings
azkaban.jobtype.plugin.dir=/usr/local/azkaban/azkaban-exec-server/plugins/jobtypes
# Azkaban mysql settings by default. Users should configure their own username and password.
database.type=mysql
mysql.port=3306
mysql.host=hadoop1
mysql.database=azkaban
mysql.user=root
mysql.password=root
mysql.numconnections=100
# Azkaban Executor settings
executor.maxThreads=50
executor.port=12321
executor.flow.threads=30
(11)启动exec-server
(12)启动web-server
(13)网页输入https://IP地址:8443,账号和密码为admin