目录
一、概述
azkaban github地址:https://github.com/azkaban/azkaban
自定义构建azkaban,使用分布式的方案在k8s部署。自定义构建的azkaban实现的功能有:
- 设置镜像时区为中国上海,从华为云下载oracle-jdk使用
- azkaban执行器exec和web镜像共用,通过启动参数实现启动exec或者web
- 通过configMap实现azkaban 配置文件挂载,挂载azkaban-exec执行器配置文件、挂载azkaban-web管理web的配置文件以及挂载azkaban-web 用户配置文件。
- 启动azkaban-exec执行器自动完成实例注册(自动将exec实例注册到数据库)
- 关闭azkaban-exec执行器,自动从数据库记录注销实例记录。
- 可自行根据业务情况对azkaban-exec执行器实例数做伸缩调整
- 使用非容用户运行azkaban,更加安全
二、构建azkaban镜像的必要文件
- azkaban-exec-server-3.91.0.tar.gz
- azkaban-web-server-3.91.0.tar.gz
- Dockerfile
- start-azkaban.sh
说明: azkaban 建库建表脚本,可在azkaban打包后的文件获取。建库建表脚本是azkaban运行的必要条件;而不是构建azkaban镜像的必要条件。
三、azkaban构建文件和部署文件
1、azkaban 构建镜像 Dockerfile文件
备注:若是业务需要,你可以修改Dockerfile, 在Dockerfile 新增一个用于运行业务应用的目录,并且注意业务应用所在目录的用户属组权限。
## cat Dockerfile
FROM centos/centos:7.9.2009 AS BASE
LABEL "作者"="@tudou" "date"="2023-05-09" "Azkaban version"="3.91.0-191"
# 上海时间 和 安装 oracle-jdk
RUN rm -rf /etc/localtime \
&& ln -s /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \
&& yum install -y https://repo.huaweicloud.com/java/jdk/8u202-b08/jdk-8u202-linux-x64.rpm
# 替换yum源
RUN rm -rf /etc/yum.repos.d/*.repo \
&& curl -o /etc/yum.repos.d/CentOS-Base.repo http://mirrors.cloud.tencent.com/repo/centos7_base.repo
# && curl -sS https://downloads.mariadb.com/MariaDB/mariadb_repo_setup | bash
# 不安装 MariaDB-client 和 mysql 等, 不要执行上面注释的语句
# 安装依赖工具,删除 yum 缓储和数据; telnet 是排查网络,可不装
RUN set -eux \
&& yum clean all \
&& yum makecache \
&& yum install -y procps telnet \
# && yum install -y MariaDB-client \
&& yum clean all \
&& rm -rf /var/lib/yum/*
######################################################################################
#[root@test azkaban]# ## 构建 azkaban 镜像的 必要文件
#[root@test azkaban]#
#[root@test azkaban]# tree
#.
#├── azkaban-exec-server-3.91.0.tar.gz
#├── azkaban-web-server-3.91.0.tar.gz
#├── Dockerfile
#└── start-azkaban.sh
#[root@test azkaban]#
######################################################################################
# 将 azkaban 必要的文件复制到基础镜像里构建
COPY . /tmp/
# 建议把型号替换成实际版本号
RUN mkdir -p /opt/azkaban/conf \
&& tar -xvf /tmp/azkaban-exec-server-3.91.0.tar.gz -C /opt/azkaban \
&& ln -s /opt/azkaban/azkaban-exec-server-3.91.0 /opt/azkaban/azkaban-exec \
&& tar -xvf /tmp/azkaban-web-server-3.91.0.tar.gz -C /opt/azkaban \
&& ln -s /opt/azkaban/azkaban-web-server-3.91.0 /opt/azkaban/azkaban-web \
&& mv /tmp/start-azkaban.sh /usr/bin \
&& chmod a+x /usr/bin/start-azkaban.sh \
&& rm -rf /tmp/*
# 添加 azkaban 运行用户,若不设置就使用 root 运行,安全问题
RUN useradd -r azkaban -s /bin/bash \
&& chown -R azkaban.azkaban /opt/azkaban \
&& chown -R azkaban.azkaban /usr/bin/start-azkaban.sh
WORKDIR /opt/azkaban/
ENV AZKABAN_EXEC_HOME=/opt/azkaban/azkaban-exec \
AZKABAN_WEB_HOME=/opt/azkaban/azkaban-web \
PATH=${PATH}:${AZKABAN_EXEC_HOME}/bin:${AZKABAN_WEB_HOME}/bin
# 运行用户
USER azkaban
ENTRYPOINT ["start-azkaban.sh"]
CMD [ "exec" ]
2、azkaban启动文件 start-azkaban.sh
## cat start-azkaban.sh
#!/bin/bash
#creat by tudou
#date 2023-05-09
funReplaceENV(){
if [ -f /opt/azkaban/conf/azkaban-exec.properties ];then
echo "Move exec configfile(azkaban.properties) to ./conf/"
cp -f /opt/azkaban/conf/azkaban-exec.properties ${AZKABAN_EXEC_HOME}/conf/azkaban.properties
fi
if [ -f /opt/azkaban/conf/azkaban-web.properties ];then
echo "Move web configfile(azkaban.properties) to ./conf/"
cp -f /opt/azkaban/conf/azkaban-web.properties ${AZKABAN_WEB_HOME}/conf/azkaban.properties
fi
}
funStartExec(){
echo "start azkaban exec!"
cd ${AZKABAN_EXEC_HOME}
funReplaceENV
bash ./bin/internal/internal-start-executor.sh 2>&1 |tee
# 延时一段时间,是为了保证 exec 启动成功,要不然会注册失败
sleep 10s
curl "http://`hostname -A`:12321/executor?action=activate"
}
funStartWeb(){
echo "start azkaban web!"
cd ${AZKABAN_WEB_HOME}
funReplaceENV
bash ./bin/internal/internal-start-web.sh 2>&1 |tee
}
if [ "$1" = "exec" ]
then
funStartExec
elif [ "$1" = "web" ]
then
funStartWeb
elif [ "$1" = "all" ]
then
funStartExec
funStartWeb
else
# $1 是 ENTRYPOINT 启动命令的 参数
echo "args is 'web' or 'exec' or 'all' !! But your input is $1"
fi
3、azkaban 部署文件 azkaban-sts.yaml
azkaban-sts.yaml 使用说明:
- 在使用 azkaban-sts.yaml 部署文件前,你必须要修改配置文件数据库相关连接信息
- 可选配置邮件通知 ,我写的是虚假邮件信息,你可以替换成你的真实邮箱信息
- 可选自定义azkaban-web 访问用户
## azkaban 部署文件和 配置文件
## cat azkaban-sts.yaml
---
apiVersion: v1
kind: ConfigMap
metadata:
name: azkaban-config
labels:
app: azkaban
deloy-exec: exec
deloy-web: web
annotations:
azkaban-exec.properties: Azkaban-exec Azkaban执行器配置文件
azkaban-web.properties: Azkaban-web Azkaban web管理应用配置文件
azkaban-users.xml: Azkaban-web Azkaban web管理应用 登录用户 配置文件
data:
MYSQL_DB: azkaban
MYSQL_HOST: 192.168.51.133
MYSQL_PASSWORD: azkaban
MYSQL_PORT: "3306"
MYSQL_USER: azkaban
### Azkaban-exec 配置文件
azkaban-exec.properties: |-
azkaban.name=Azkaban-k8s
azkaban.label=Azkaban-saas
azkaban.color=#FF3601
azkaban.default.servlet.path=/index
web.resource.dir=web/
default.timezone.id=Asia/Shanghai
user.manager.class=azkaban.user.XmlUserManager
user.manager.xml.file=conf/azkaban-users.xml
executor.global.properties=conf/global.properties
azkaban.project.dir=projects
velocity.dev.mode=false
jetty.use.ssl=false
jetty.maxThreads=25
jetty.port=8081
mail.sender=admin@qq.com
mail.host=smtp.exmail.qq.com
mail.user=admin@qq.com
mail.password=passwd20230509
job.failure.email=
job.success.email=
lockdown.create.projects=false
cache.directory=cache
jetty.connector.stats=true
executor.connector.stats=true
database.type=mysql
mysql.port=3306
mysql.host=192.168.51.133
mysql.database=azkaban
mysql.user=azkaban
mysql.password=azkaban
mysql.numconnections=300
executor.port=12321
executor.maxThreads=50
executor.flow.threads=30
azkaban.webserver.url=http://localhost:8081
azkaban.jobtype.plugin.dir=plugins/jobtypes
### Azkaban-web 登录 users 配置文件
azkaban-users.xml: |-
<azkaban-users>
<user groups="azkaban" password="azkaban" roles="admin" username="azkaban"/>
<user username="admin" password="admin" groups="azkaban" roles="admin"/>
<user username="admin1" password="admin1" groups="azkaban" roles="admin"/>
<user password="metrics" roles="metrics" username="metrics"/>
<role name="admin" permissions="ADMIN"/>
<role name="metrics" permissions="METRICS"/>
</azkaban-users>
### Azkaban-web 配置文件
azkaban-web.properties: |-
azkaban.name=Azkaban-k8s
azkaban.label=Azkaban-saas
azkaban.color=#FF3601
azkaban.default.servlet.path=/index
web.resource.dir=web/
default.timezone.id=Asia/Shanghai
user.manager.class=azkaban.user.XmlUserManager
user.manager.xml.file=conf/users/azkaban-users.xml
executor.global.properties=conf/global.properties
azkaban.project.dir=projects
velocity.dev.mode=false
jetty.use.ssl=false
jetty.maxThreads=25
jetty.port=8081
mail.sender=admin@qq.com
mail.host=smtp.exmail.qq.com
mail.user=admin@qq.com
mail.password=passwd20230509
job.failure.email=
job.success.email=
lockdown.create.projects=false
cache.directory=cache
jetty.connector.stats=true
executor.connector.stats=true
database.type=mysql
mysql.port=3306
mysql.host=192.168.51.133
mysql.database=azkaban_test
mysql.user=azkaban_test
mysql.password=azkaban_test
mysql.numconnections=300
azkaban.use.multiple.executors=true
azkaban.executorselector.filters=StaticRemainingFlowSize,CpuStatus
azkaban.executorselector.comparator.NumberOfAssignedFlowComparator=1
azkaban.executorselector.comparator.Memory=1
azkaban.executorselector.comparator.LastDispatched=1
azkaban.executorselector.comparator.CpuUsage=1
---
### Azkaban-exec 部署文件
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: azkaban-exec
spec:
serviceName: "azkaban-exec"
selector:
matchLabels:
app: azkaban
deploy: exec
replicas: 1
template:
metadata:
labels:
app: azkaban
deploy: exec
spec:
terminationGracePeriodSeconds: 10
imagePullSecrets:
- name: default-secret
containers:
- name: azkaban
image: test/azkaban:v3.91.0
imagePullPolicy: IfNotPresent
args: ["exec"]
lifecycle:
postStart:
exec:
command:
- /bin/bash
- -c
- sleep 15s && curl "http://`hostname -A`:12321/executor?action=activate"
preStop:
exec:
command:
- /bin/bash
- -c
- mysql -u${MYSQL_USER} -p${MYSQL_PASSWORD} -h${MYSQL_HOST} -P${MYSQL_PORT} ${MYSQL_DB} -e "UPDATE executors SET active=0 WHERE host='`hostname -A`'"
envFrom:
- configMapRef:
name: azkaban-config
resources:
limits:
cpu: "1"
memory: 2Gi
requests:
cpu: "1"
memory: 2Gi
volumeMounts:
- name: config
mountPath: /opt/azkaban/conf/
- mountPath: /etc/localtime
name: localtime
readOnly: true
volumes:
- hostPath:
path: /etc/localtime
type: ""
name: localtime
- name: config
configMap:
name: azkaban-config
items:
- key: azkaban-exec.properties
path: azkaban-exec.properties
---
apiVersion: v1
kind: Service
metadata:
name: azkaban-exec
spec:
selector:
app: azkaban
deploy: exec
clusterIP: None
ports:
- port: 12321
targetPort: 12321
---
### Azkaban-web
apiVersion: apps/v1
kind: Deployment
metadata:
name: azkaban-web
labels:
app: azkaban
deploy: web
spec:
replicas: 1
type: Recreate
selector:
matchLabels:
app: azkaban
deploy: web
template:
metadata:
labels:
app: azkaban
deploy: web
spec:
imagePullSecrets:
- name: default-secret
containers:
- name: azkaban
image: test/azkaban:v3.91.0
imagePullPolicy: IfNotPresent
args: ["web"]
ports:
- containerPort: 8081
protocol: TCP
resources:
limits:
cpu: "1"
memory: 2Gi
requests:
cpu: "1"
memory: 2Gi
volumeMounts:
- name: config
mountPath: /opt/azkaban/conf/
- name: localtime
mountPath: /etc/localtime
readOnly: true
- name: user-config
mountPath: /opt/azkaban/azkaban-web/conf/users/
volumes:
- name: config
configMap:
name: azkaban-config
items:
- key: azkaban-web.properties
path: azkaban-web.properties
- name: user-config
configMap:
name: azkaban-config
items:
- key: azkaban-users.xml
path: azkaban-users.xml
- name: localtime
hostPath:
path: /etc/localtime
type: ""
---
apiVersion: v1
kind: Service
metadata:
name: azkaban-web
spec:
selector:
app: azkaban
deploy: web
type: NodePort
ports:
- port: 8081
targetPort: 8081
nodePort: 31081
---
四、azkaban全部内容
这个部分主要的作用是回放我构建azkaban的环境、用到的文件和操作过程,有助于初学者思考。以及最后附加了azkaban 简单的说明文档和 azkaban建库建表sql语句。
[root@test azkaban]# pwd
/root/azkaban/azkaban
[root@test azkaban]#
[root@test azkaban]# ## 构建 azkaban 镜像的 文件
[root@test azkaban]#
[root@test azkaban]# tree
.
├── azkaban-exec-server-3.91.0.tar.gz
├── azkaban-sts.yaml
├── azkaban-web-server-3.91.0.tar.gz
├── create-all-sql-3.91.0.sql
├── Dockerfile
├── README.MD
└── start-azkaban.sh
0 directories, 7 files
[root@test azkaban]#
[root@test azkaban]# ls -l
总用量 119580
-rw-r--r-- 1 root root 64642519 5月 9 04:09 azkaban-exec-server-3.91.0.tar.gz
-rw-r--r-- 1 root root 7501 5月 9 07:46 azkaban-sts.yaml
-rw-r--r-- 1 root root 57765975 5月 9 04:10 azkaban-web-server-3.91.0.tar.gz
-rw-r--r-- 1 root root 14298 5月 9 04:06 create-all-sql-3.91.0.sql
-rw-r--r-- 1 root root 1955 5月 9 07:34 Dockerfile
-rw-r--r-- 1 root root 1355 5月 9 07:38 README.MD
-rw-r--r-- 1 root root 1280 5月 9 07:39 start-azkaban.sh
[root@test azkaban]#
[root@test azkaban]# # Dcoekerfile 文件内容
[root@test azkaban]#
[root@test azkaban]# cat Dockerfile
FROM centos/centos:7.9.2009 AS BASE
LABEL "作者"="@tudou" "date"="2023-05-09" "Azkaban version"="3.91.0-191"
# 上海时间 和 安装 oracle-jdk
RUN rm -rf /etc/localtime \
&& ln -s /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \
&& yum install -y https://repo.huaweicloud.com/java/jdk/8u202-b08/jdk-8u202-linux-x64.rpm
# 替换yum源
RUN rm -rf /etc/yum.repos.d/*.repo \
&& curl -o /etc/yum.repos.d/CentOS-Base.repo http://mirrors.cloud.tencent.com/repo/centos7_base.repo
# && curl -sS https://downloads.mariadb.com/MariaDB/mariadb_repo_setup | bash
# 不安装 MariaDB-client 和 mysql 等, 不要执行上面注释的语句
# 安装依赖工具,删除 yum 缓储和数据; telnet 是排查网络,可不装
RUN set -eux \
&& yum clean all \
&& yum makecache \
&& yum install -y procps telnet \
# && yum install -y MariaDB-client \
&& yum clean all \
&& rm -rf /var/lib/yum/*
# azkaban
COPY . /tmp/
# 建议把型号替换成实际版本号
RUN mkdir -p /opt/azkaban/conf \
&& tar -xvf /tmp/azkaban-exec-server-3.91.0.tar.gz -C /opt/azkaban \
&& ln -s /opt/azkaban/azkaban-exec-server-3.91.0 /opt/azkaban/azkaban-exec \
&& tar -xvf /tmp/azkaban-web-server-3.91.0.tar.gz -C /opt/azkaban \
&& ln -s /opt/azkaban/azkaban-web-server-3.91.0 /opt/azkaban/azkaban-web \
&& mv /tmp/start-azkaban.sh /usr/bin \
&& chmod a+x /usr/bin/start-azkaban.sh \
&& rm -rf /tmp/*
# 添加 azkaban 运行用户,若不设置就使用 root 运行,安全问题
RUN useradd -r azkaban -s /bin/bash \
&& chown -R azkaban.azkaban /opt/azkaban \
&& chown -R azkaban.azkaban /usr/bin/start-azkaban.sh
WORKDIR /opt/azkaban/
ENV AZKABAN_EXEC_HOME=/opt/azkaban/azkaban-exec \
AZKABAN_WEB_HOME=/opt/azkaban/azkaban-web \
PATH=${PATH}:${AZKABAN_EXEC_HOME}/bin:${AZKABAN_WEB_HOME}/bin
# 运行用户
USER azkaban
ENTRYPOINT ["start-azkaban.sh"]
CMD [ "exec" ]
[root@test azkaban]#
[root@test azkaban]# ## 启动文件内容 start-azkaban.sh
[root@test azkaban]#
[root@test azkaban]# cat start-azkaban.sh
#!/bin/bash
#creat by tudou
#date 2023-05-09
funReplaceENV(){
if [ -f /opt/azkaban/conf/azkaban-exec.properties ];then
echo "Move exec configfile(azkaban.properties) to ./conf/"
cp -f /opt/azkaban/conf/azkaban-exec.properties ${AZKABAN_EXEC_HOME}/conf/azkaban.properties
fi
if [ -f /opt/azkaban/conf/azkaban-web.properties ];then
echo "Move web configfile(azkaban.properties) to ./conf/"
cp -f /opt/azkaban/conf/azkaban-web.properties ${AZKABAN_WEB_HOME}/conf/azkaban.properties
fi
}
funStartExec(){
echo "start azkaban exec!"
cd ${AZKABAN_EXEC_HOME}
funReplaceENV
bash ./bin/internal/internal-start-executor.sh 2>&1 |tee
# 延时一段时间,是为了保证 exec 启动成功,要不然会注册失败
sleep 10s
curl "http://`hostname -A`:12321/executor?action=activate"
}
funStartWeb(){
echo "start azkaban web!"
cd ${AZKABAN_WEB_HOME}
funReplaceENV
bash ./bin/internal/internal-start-web.sh 2>&1 |tee
}
if [ "$1" = "exec" ]
then
funStartExec
elif [ "$1" = "web" ]
then
funStartWeb
elif [ "$1" = "all" ]
then
funStartExec
funStartWeb
else
# $1 是 ENTRYPOINT 启动命令的 参数
echo "args is 'web' or 'exec' or 'all' !! But your input is $1"
fi
[root@test azkaban]#
[root@test azkaban]#
[root@test azkaban]# # azkaban 有状态负载部署文件 azkaban-sts.yaml
[root@test azkaban]#
[root@test azkaban]# cat azkaban-sts.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: azkaban-config
labels:
app: azkaban
deloy-exec: exec
deloy-web: web
annotations:
azkaban-exec.properties: Azkaban-exec Azkaban执行器配置文件
azkaban-web.properties: Azkaban-web Azkaban web管理应用配置文件
azkaban-users.xml: Azkaban-web Azkaban web管理应用 登录用户 配置文件
data:
MYSQL_DB: azkaban
MYSQL_HOST: 192.168.51.133
MYSQL_PASSWORD: azkaban
MYSQL_PORT: "3306"
MYSQL_USER: azkaban
### Azkaban-exec 配置文件
azkaban-exec.properties: |-
azkaban.name=Azkaban-k8s
azkaban.label=Azkaban-saas
azkaban.color=#FF3601
azkaban.default.servlet.path=/index
web.resource.dir=web/
default.timezone.id=Asia/Shanghai
user.manager.class=azkaban.user.XmlUserManager
user.manager.xml.file=conf/azkaban-users.xml
executor.global.properties=conf/global.properties
azkaban.project.dir=projects
velocity.dev.mode=false
jetty.use.ssl=false
jetty.maxThreads=25
jetty.port=8081
mail.sender=admin@qq.com
mail.host=smtp.exmail.qq.com
mail.user=admin@qq.com
mail.password=passwd20230509
job.failure.email=
job.success.email=
lockdown.create.projects=false
cache.directory=cache
jetty.connector.stats=true
executor.connector.stats=true
database.type=mysql
mysql.port=3306
mysql.host=192.168.51.133
mysql.database=azkaban
mysql.user=azkaban
mysql.password=azkaban
mysql.numconnections=300
executor.port=12321
executor.maxThreads=50
executor.flow.threads=30
azkaban.webserver.url=http://localhost:8081
azkaban.jobtype.plugin.dir=plugins/jobtypes
### Azkaban-web 登录 users 配置文件
azkaban-users.xml: |-
<azkaban-users>
<user groups="azkaban" password="azkaban" roles="admin" username="azkaban"/>
<user username="admin" password="admin" groups="azkaban" roles="admin"/>
<user username="admin1" password="admin1" groups="azkaban" roles="admin"/>
<user password="metrics" roles="metrics" username="metrics"/>
<role name="admin" permissions="ADMIN"/>
<role name="metrics" permissions="METRICS"/>
</azkaban-users>
### Azkaban-web 配置文件
azkaban-web.properties: |-
azkaban.name=Azkaban-k8s
azkaban.label=Azkaban-saas
azkaban.color=#FF3601
azkaban.default.servlet.path=/index
web.resource.dir=web/
default.timezone.id=Asia/Shanghai
user.manager.class=azkaban.user.XmlUserManager
user.manager.xml.file=conf/users/azkaban-users.xml
executor.global.properties=conf/global.properties
azkaban.project.dir=projects
velocity.dev.mode=false
jetty.use.ssl=false
jetty.maxThreads=25
jetty.port=8081
mail.sender=admin@qq.com
mail.host=smtp.exmail.qq.com
mail.user=admin@qq.com
mail.password=passwd20230509
job.failure.email=
job.success.email=
lockdown.create.projects=false
cache.directory=cache
jetty.connector.stats=true
executor.connector.stats=true
database.type=mysql
mysql.port=3306
mysql.host=192.168.51.133
mysql.database=azkaban_test
mysql.user=azkaban_test
mysql.password=azkaban_test
mysql.numconnections=300
azkaban.use.multiple.executors=true
azkaban.executorselector.filters=StaticRemainingFlowSize,CpuStatus
azkaban.executorselector.comparator.NumberOfAssignedFlowComparator=1
azkaban.executorselector.comparator.Memory=1
azkaban.executorselector.comparator.LastDispatched=1
azkaban.executorselector.comparator.CpuUsage=1
---
### Azkaban-exec 部署文件
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: azkaban-exec
spec:
serviceName: "azkaban-exec"
selector:
matchLabels:
app: azkaban
deploy: exec
replicas: 1
template:
metadata:
labels:
app: azkaban
deploy: exec
spec:
terminationGracePeriodSeconds: 10
imagePullSecrets:
- name: default-secret
containers:
- name: azkaban
image: test/azkaban:v3.91.0
imagePullPolicy: IfNotPresent
args: ["exec"]
lifecycle:
postStart:
exec:
command:
- /bin/bash
- -c
- sleep 15s && curl "http://`hostname -A`:12321/executor?action=activate"
preStop:
exec:
command:
- /bin/bash
- -c
- mysql -u${MYSQL_USER} -p${MYSQL_PASSWORD} -h${MYSQL_HOST} -P${MYSQL_PORT} ${MYSQL_DB} -e "UPDATE executors SET active=0 WHERE host='`hostname -A`'"
envFrom:
- configMapRef:
name: azkaban-config
resources:
limits:
cpu: "1"
memory: 2Gi
requests:
cpu: "1"
memory: 2Gi
volumeMounts:
- name: config
mountPath: /opt/azkaban/conf/
- mountPath: /etc/localtime
name: localtime
readOnly: true
volumes:
- hostPath:
path: /etc/localtime
type: ""
name: localtime
- name: config
configMap:
name: azkaban-config
items:
- key: azkaban-exec.properties
path: azkaban-exec.properties
---
apiVersion: v1
kind: Service
metadata:
name: azkaban-exec
spec:
selector:
app: azkaban
deploy: exec
clusterIP: None
ports:
- port: 12321
targetPort: 12321
---
### Azkaban-web
apiVersion: apps/v1
kind: Deployment
metadata:
name: azkaban-web
labels:
app: azkaban
deploy: web
spec:
replicas: 1
type: Recreate
selector:
matchLabels:
app: azkaban
deploy: web
template:
metadata:
labels:
app: azkaban
deploy: web
spec:
imagePullSecrets:
- name: default-secret
containers:
- name: azkaban
image: test/azkaban:v3.91.0
imagePullPolicy: IfNotPresent
args: ["web"]
ports:
- containerPort: 8081
protocol: TCP
resources:
limits:
cpu: "1"
memory: 2Gi
requests:
cpu: "1"
memory: 2Gi
volumeMounts:
- name: config
mountPath: /opt/azkaban/conf/
- name: localtime
mountPath: /etc/localtime
readOnly: true
- name: user-config
mountPath: /opt/azkaban/azkaban-web/conf/users/
volumes:
- name: config
configMap:
name: azkaban-config
items:
- key: azkaban-web.properties
path: azkaban-web.properties
- name: user-config
configMap:
name: azkaban-config
items:
- key: azkaban-users.xml
path: azkaban-users.xml
- name: localtime
hostPath:
path: /etc/localtime
type: ""
---
apiVersion: v1
kind: Service
metadata:
name: azkaban-web
spec:
selector:
app: azkaban
deploy: web
type: NodePort
ports:
- port: 8081
targetPort: 8081
nodePort: 31081
---
[root@test azkaban]#
[root@test azkaban]#
[root@test azkaban]# ## README.MD 文件内容
[root@test azkaban]#
[root@test azkaban]# cat README.MD
### 建立镜像
使用编译好的azkaban 3.91.0版本建立镜像
# 从 github 下载源码构建,jdk 最低版本 8
# yum install -y https://repo.huaweicloud.com/java/jdk/8u202-b08/jdk-8u202-linux-x64.rpm
# git clone https://github.com/azkaban/azkaban.git
cd dockerfile
docker build -t test/azkaban:v20210915 .
docker push test/azkaban:v20210915
### 按需创建namespace
kubectl create ns azkaban
### MYSQL数据库
- 数据库数据初始化
CREATE DATABASE azkaban
mysql -umyuser -pmypassword azkaban < create-all-sql-3.91.0.sql
### 创建 azkaban 应用实例
kubectl -n azkaban apply -f azkaban-sts.yaml
### 登陆访问web ui
kubectl -n azkaban get svc
```
<http://CLUSTER-IP:8081>
<http://localhost:31081>
### 其他
- 支持scale命令自动扩展exec节点
kubectl -n azkaban scale statefulset azkaban-exec --replicas=2
- 查看执行器注册状态
mysql -umyuser -pmypassword azkaban -e "select * from azkaban.executors"
+----+-----------------------------------------------------+-------+--------+
| id | host | port | active |
+----+-----------------------------------------------------+-------+--------+
| 9 | azkaban-exec-0.azkaban-exec.azkaban.svc.cluster.local | 12321 | 1 |
| 10 | azkaban-exec-1.azkaban-exec.azkaban.svc.cluster.local | 12321 | 1 |
+----+-----------------------------------------------------+-------+--------+
[root@test azkaban]#
[root@test azkaban]#
[root@test azkaban]# ## azkaban 建表文件 create-all-sql-3.91.0.sql
[root@test azkaban]#
[root@test azkaban]# cat create-all-sql-3.91.0.sql
CREATE TABLE active_executing_flows (
exec_id INT,
update_time BIGINT,
PRIMARY KEY (exec_id)
);
CREATE TABLE active_sla (
exec_id INT NOT NULL,
job_name VARCHAR(128) NOT NULL,
check_time BIGINT NOT NULL,
rule TINYINT NOT NULL,
enc_type TINYINT,
options LONGBLOB NOT NULL,
PRIMARY KEY (exec_id, job_name)
);
CREATE TABLE execution_dependencies(
trigger_instance_id varchar(64),
dep_name varchar(128),
starttime bigint(20) not null,
endtime bigint(20),
dep_status tinyint not null,
cancelleation_cause tinyint not null,
project_id INT not null,
project_version INT not null,
flow_id varchar(128) not null,
flow_version INT not null,
flow_exec_id INT not null,
primary key(trigger_instance_id, dep_name)
);
CREATE INDEX ex_end_time
ON execution_dependencies (endtime);
CREATE TABLE execution_flows (
exec_id INT NOT NULL AUTO_INCREMENT,
project_id INT NOT NULL,
version INT NOT NULL,
flow_id VARCHAR(128) NOT NULL,
status TINYINT,
submit_user VARCHAR(64),
submit_time BIGINT,
update_time BIGINT,
start_time BIGINT,
end_time BIGINT,
enc_type TINYINT,
flow_data LONGBLOB,
executor_id INT DEFAULT NULL,
use_executor INT DEFAULT NULL,
flow_priority TINYINT NOT NULL DEFAULT 5,
PRIMARY KEY (exec_id)
);
CREATE INDEX ex_flows_start_time
ON execution_flows (start_time);
CREATE INDEX ex_flows_end_time
ON execution_flows (end_time);
CREATE INDEX ex_flows_time_range
ON execution_flows (start_time, end_time);
CREATE INDEX ex_flows_flows
ON execution_flows (project_id, flow_id);
CREATE INDEX executor_id
ON execution_flows (executor_id);
CREATE INDEX ex_flows_staus
ON execution_flows (status);
CREATE TABLE execution_jobs (
exec_id INT NOT NULL,
project_id INT NOT NULL,
version INT NOT NULL,
flow_id VARCHAR(128) NOT NULL,
job_id VARCHAR(512) NOT NULL,
attempt INT,
start_time BIGINT,
end_time BIGINT,
status TINYINT,
input_params LONGBLOB,
output_params LONGBLOB,
attachments LONGBLOB,
PRIMARY KEY (exec_id, job_id, flow_id, attempt)
);
CREATE INDEX ex_job_id
ON execution_jobs (project_id, job_id);
-- In table execution_logs, name is the combination of flow_id and job_id
--
-- prefix support and lengths of prefixes (where supported) are storage engine dependent.
-- By default, the index key prefix length limit is 767 bytes for innoDB.
-- from: https://dev.mysql.com/doc/refman/5.7/en/create-index.html
CREATE TABLE execution_logs (
exec_id INT NOT NULL,
name VARCHAR(640),
attempt INT,
enc_type TINYINT,
start_byte INT,
end_byte INT,
log LONGBLOB,
upload_time BIGINT,
PRIMARY KEY (exec_id, name, attempt, start_byte)
);
CREATE INDEX ex_log_attempt
ON execution_logs (exec_id, name, attempt);
CREATE INDEX ex_log_index
ON execution_logs (exec_id, name);
CREATE INDEX ex_log_upload_time
ON execution_logs (upload_time);
CREATE TABLE executor_events (
executor_id INT NOT NULL,
event_type TINYINT NOT NULL,
event_time DATETIME NOT NULL,
username VARCHAR(64),
message VARCHAR(512)
);
CREATE INDEX executor_log
ON executor_events (executor_id, event_time);
CREATE TABLE executors (
id INT NOT NULL PRIMARY KEY AUTO_INCREMENT,
host VARCHAR(64) NOT NULL,
port INT NOT NULL,
active BOOLEAN DEFAULT FALSE,
UNIQUE (host, port)
);
CREATE INDEX executor_connection
ON executors (host, port);
CREATE TABLE project_events (
project_id INT NOT NULL,
event_type TINYINT NOT NULL,
event_time BIGINT NOT NULL,
username VARCHAR(64),
message VARCHAR(512)
);
CREATE INDEX log
ON project_events (project_id, event_time);
CREATE TABLE project_files (
project_id INT NOT NULL,
version INT NOT NULL,
chunk INT,
size INT,
file LONGBLOB,
PRIMARY KEY (project_id, version, chunk)
);
CREATE INDEX file_version
ON project_files (project_id, version);
CREATE TABLE project_flow_files (
project_id INT NOT NULL,
project_version INT NOT NULL,
flow_name VARCHAR(128) NOT NULL,
flow_version INT NOT NULL,
modified_time BIGINT NOT NULL,
flow_file LONGBLOB,
PRIMARY KEY (project_id, project_version, flow_name, flow_version)
);
CREATE TABLE project_flows (
project_id INT NOT NULL,
version INT NOT NULL,
flow_id VARCHAR(128),
modified_time BIGINT NOT NULL,
encoding_type TINYINT,
json MEDIUMBLOB,
PRIMARY KEY (project_id, version, flow_id)
);
CREATE INDEX flow_index
ON project_flows (project_id, version);
CREATE TABLE project_permissions (
project_id VARCHAR(64) NOT NULL,
modified_time BIGINT NOT NULL,
name VARCHAR(64) NOT NULL,
permissions INT NOT NULL,
isGroup BOOLEAN NOT NULL,
PRIMARY KEY (project_id, name, isGroup)
);
CREATE INDEX permission_index
ON project_permissions (project_id);
CREATE TABLE project_properties (
project_id INT NOT NULL,
version INT NOT NULL,
name VARCHAR(255),
modified_time BIGINT NOT NULL,
encoding_type TINYINT,
property BLOB,
PRIMARY KEY (project_id, version, name)
);
CREATE INDEX properties_index
ON project_properties (project_id, version);
CREATE TABLE project_versions (
project_id INT NOT NULL,
version INT NOT NULL,
upload_time BIGINT NOT NULL,
uploader VARCHAR(64) NOT NULL,
file_type VARCHAR(16),
file_name VARCHAR(128),
md5 BINARY(16),
num_chunks INT,
resource_id VARCHAR(512) DEFAULT NULL,
startup_dependencies MEDIUMBLOB DEFAULT NULL,
uploader_ip_addr VARCHAR(50) DEFAULT NULL,
PRIMARY KEY (project_id, version)
);
CREATE INDEX version_index
ON project_versions (project_id);
CREATE TABLE projects (
id INT NOT NULL PRIMARY KEY AUTO_INCREMENT,
name VARCHAR(64) NOT NULL,
active BOOLEAN,
modified_time BIGINT NOT NULL,
create_time BIGINT NOT NULL,
version INT,
last_modified_by VARCHAR(64) NOT NULL,
description VARCHAR(2048),
enc_type TINYINT,
settings_blob LONGBLOB
);
CREATE INDEX project_name
ON projects (name);
CREATE TABLE properties (
name VARCHAR(64) NOT NULL,
type INT NOT NULL,
modified_time BIGINT NOT NULL,
value VARCHAR(256),
PRIMARY KEY (name, type)
);
-- This file collects all quartz table create statement required for quartz 2.2.1
--
-- We are using Quartz 2.2.1 tables, the original place of which can be found at
-- https://github.com/quartz-scheduler/quartz/blob/quartz-2.2.1/distribution/src/main/assembly/root/docs/dbTables/tables_mysql.sql
DROP TABLE IF EXISTS QRTZ_FIRED_TRIGGERS;
DROP TABLE IF EXISTS QRTZ_PAUSED_TRIGGER_GRPS;
DROP TABLE IF EXISTS QRTZ_SCHEDULER_STATE;
DROP TABLE IF EXISTS QRTZ_LOCKS;
DROP TABLE IF EXISTS QRTZ_SIMPLE_TRIGGERS;
DROP TABLE IF EXISTS QRTZ_SIMPROP_TRIGGERS;
DROP TABLE IF EXISTS QRTZ_CRON_TRIGGERS;
DROP TABLE IF EXISTS QRTZ_BLOB_TRIGGERS;
DROP TABLE IF EXISTS QRTZ_TRIGGERS;
DROP TABLE IF EXISTS QRTZ_JOB_DETAILS;
DROP TABLE IF EXISTS QRTZ_CALENDARS;
CREATE TABLE QRTZ_JOB_DETAILS
(
SCHED_NAME VARCHAR(120) NOT NULL,
JOB_NAME VARCHAR(200) NOT NULL,
JOB_GROUP VARCHAR(200) NOT NULL,
DESCRIPTION VARCHAR(250) NULL,
JOB_CLASS_NAME VARCHAR(250) NOT NULL,
IS_DURABLE VARCHAR(1) NOT NULL,
IS_NONCONCURRENT VARCHAR(1) NOT NULL,
IS_UPDATE_DATA VARCHAR(1) NOT NULL,
REQUESTS_RECOVERY VARCHAR(1) NOT NULL,
JOB_DATA BLOB NULL,
PRIMARY KEY (SCHED_NAME,JOB_NAME,JOB_GROUP)
);
CREATE TABLE QRTZ_TRIGGERS
(
SCHED_NAME VARCHAR(120) NOT NULL,
TRIGGER_NAME VARCHAR(200) NOT NULL,
TRIGGER_GROUP VARCHAR(200) NOT NULL,
JOB_NAME VARCHAR(200) NOT NULL,
JOB_GROUP VARCHAR(200) NOT NULL,
DESCRIPTION VARCHAR(250) NULL,
NEXT_FIRE_TIME BIGINT(13) NULL,
PREV_FIRE_TIME BIGINT(13) NULL,
PRIORITY INTEGER NULL,
TRIGGER_STATE VARCHAR(16) NOT NULL,
TRIGGER_TYPE VARCHAR(8) NOT NULL,
START_TIME BIGINT(13) NOT NULL,
END_TIME BIGINT(13) NULL,
CALENDAR_NAME VARCHAR(200) NULL,
MISFIRE_INSTR SMALLINT(2) NULL,
JOB_DATA BLOB NULL,
PRIMARY KEY (SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP),
FOREIGN KEY (SCHED_NAME,JOB_NAME,JOB_GROUP)
REFERENCES QRTZ_JOB_DETAILS(SCHED_NAME,JOB_NAME,JOB_GROUP)
);
CREATE TABLE QRTZ_SIMPLE_TRIGGERS
(
SCHED_NAME VARCHAR(120) NOT NULL,
TRIGGER_NAME VARCHAR(200) NOT NULL,
TRIGGER_GROUP VARCHAR(200) NOT NULL,
REPEAT_COUNT BIGINT(7) NOT NULL,
REPEAT_INTERVAL BIGINT(12) NOT NULL,
TIMES_TRIGGERED BIGINT(10) NOT NULL,
PRIMARY KEY (SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP),
FOREIGN KEY (SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP)
REFERENCES QRTZ_TRIGGERS(SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP)
);
CREATE TABLE QRTZ_CRON_TRIGGERS
(
SCHED_NAME VARCHAR(120) NOT NULL,
TRIGGER_NAME VARCHAR(200) NOT NULL,
TRIGGER_GROUP VARCHAR(200) NOT NULL,
CRON_EXPRESSION VARCHAR(200) NOT NULL,
TIME_ZONE_ID VARCHAR(80),
PRIMARY KEY (SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP),
FOREIGN KEY (SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP)
REFERENCES QRTZ_TRIGGERS(SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP)
);
CREATE TABLE QRTZ_SIMPROP_TRIGGERS
(
SCHED_NAME VARCHAR(120) NOT NULL,
TRIGGER_NAME VARCHAR(200) NOT NULL,
TRIGGER_GROUP VARCHAR(200) NOT NULL,
STR_PROP_1 VARCHAR(512) NULL,
STR_PROP_2 VARCHAR(512) NULL,
STR_PROP_3 VARCHAR(512) NULL,
INT_PROP_1 INT NULL,
INT_PROP_2 INT NULL,
LONG_PROP_1 BIGINT NULL,
LONG_PROP_2 BIGINT NULL,
DEC_PROP_1 NUMERIC(13,4) NULL,
DEC_PROP_2 NUMERIC(13,4) NULL,
BOOL_PROP_1 VARCHAR(1) NULL,
BOOL_PROP_2 VARCHAR(1) NULL,
PRIMARY KEY (SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP),
FOREIGN KEY (SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP)
REFERENCES QRTZ_TRIGGERS(SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP)
);
CREATE TABLE QRTZ_BLOB_TRIGGERS
(
SCHED_NAME VARCHAR(120) NOT NULL,
TRIGGER_NAME VARCHAR(200) NOT NULL,
TRIGGER_GROUP VARCHAR(200) NOT NULL,
BLOB_DATA BLOB NULL,
PRIMARY KEY (SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP),
FOREIGN KEY (SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP)
REFERENCES QRTZ_TRIGGERS(SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP)
);
CREATE TABLE QRTZ_CALENDARS
(
SCHED_NAME VARCHAR(120) NOT NULL,
CALENDAR_NAME VARCHAR(200) NOT NULL,
CALENDAR BLOB NOT NULL,
PRIMARY KEY (SCHED_NAME,CALENDAR_NAME)
);
CREATE TABLE QRTZ_PAUSED_TRIGGER_GRPS
(
SCHED_NAME VARCHAR(120) NOT NULL,
TRIGGER_GROUP VARCHAR(200) NOT NULL,
PRIMARY KEY (SCHED_NAME,TRIGGER_GROUP)
);
CREATE TABLE QRTZ_FIRED_TRIGGERS
(
SCHED_NAME VARCHAR(120) NOT NULL,
ENTRY_ID VARCHAR(95) NOT NULL,
TRIGGER_NAME VARCHAR(200) NOT NULL,
TRIGGER_GROUP VARCHAR(200) NOT NULL,
INSTANCE_NAME VARCHAR(200) NOT NULL,
FIRED_TIME BIGINT(13) NOT NULL,
SCHED_TIME BIGINT(13) NOT NULL,
PRIORITY INTEGER NOT NULL,
STATE VARCHAR(16) NOT NULL,
JOB_NAME VARCHAR(200) NULL,
JOB_GROUP VARCHAR(200) NULL,
IS_NONCONCURRENT VARCHAR(1) NULL,
REQUESTS_RECOVERY VARCHAR(1) NULL,
PRIMARY KEY (SCHED_NAME,ENTRY_ID)
);
CREATE TABLE QRTZ_SCHEDULER_STATE
(
SCHED_NAME VARCHAR(120) NOT NULL,
INSTANCE_NAME VARCHAR(200) NOT NULL,
LAST_CHECKIN_TIME BIGINT(13) NOT NULL,
CHECKIN_INTERVAL BIGINT(13) NOT NULL,
PRIMARY KEY (SCHED_NAME,INSTANCE_NAME)
);
CREATE TABLE QRTZ_LOCKS
(
SCHED_NAME VARCHAR(120) NOT NULL,
LOCK_NAME VARCHAR(40) NOT NULL,
PRIMARY KEY (SCHED_NAME,LOCK_NAME)
);
commit;
CREATE TABLE ramp (
rampId VARCHAR(45) NOT NULL,
rampPolicy VARCHAR(45) NOT NULL,
maxFailureToPause INT NOT NULL DEFAULT 0,
maxFailureToRampDown INT NOT NULL DEFAULT 0,
isPercentageScaleForMaxFailure TINYINT NOT NULL DEFAULT 0,
startTime BIGINT NOT NULL DEFAULT 0,
endTime BIGINT NOT NULL DEFAULT 0,
lastUpdatedTime BIGINT NOT NULL DEFAULT 0,
numOfTrail INT NOT NULL DEFAULT 0,
numOfFailure INT NOT NULL DEFAULT 0,
numOfSuccess INT NOT NULL DEFAULT 0,
numOfIgnored INT NOT NULL DEFAULT 0,
isPaused TINYINT NOT NULL DEFAULT 0,
rampStage TINYINT NOT NULL DEFAULT 0,
isActive TINYINT NOT NULL DEFAULT 0,
PRIMARY KEY (rampId)
);
CREATE INDEX idx_ramp
ON ramp (rampId);
CREATE TABLE ramp_dependency (
dependency VARCHAR(45) NOT NULL,
defaultValue VARCHAR (500),
jobtypes VARCHAR (1000),
PRIMARY KEY (dependency)
);
CREATE INDEX idx_ramp_dependency
ON ramp_dependency(dependency);
CREATE TABLE ramp_exceptional_flow_items (
rampId VARCHAR(45) NOT NULL,
flowId VARCHAR(128) NOT NULL,
treatment VARCHAR(1) NOT NULL,
timestamp BIGINT NULL,
PRIMARY KEY (rampId, flowId)
);
CREATE INDEX idx_ramp_exceptional_flow_items
ON ramp_exceptional_flow_items (rampId, flowId);
CREATE TABLE ramp_exceptional_job_items (
rampId VARCHAR(45) NOT NULL,
flowId VARCHAR(128) NOT NULL,
jobId VARCHAR(128) NOT NULL,
treatment VARCHAR(1) NOT NULL,
timestamp BIGINT NULL,
PRIMARY KEY (rampId, flowId, jobId)
);
CREATE INDEX idx_ramp_exceptional_job_items
ON ramp_exceptional_job_items (rampId, flowId, jobId);
CREATE TABLE ramp_items (
rampId VARCHAR(45) NOT NULL,
dependency VARCHAR(45) NOT NULL,
rampValue VARCHAR (500) NOT NULL,
PRIMARY KEY (rampId, dependency)
);
CREATE INDEX idx_ramp_items
ON ramp_items (rampId, dependency);
CREATE TABLE triggers (
trigger_id INT NOT NULL AUTO_INCREMENT,
trigger_source VARCHAR(128),
modify_time BIGINT NOT NULL,
enc_type TINYINT,
data LONGBLOB,
PRIMARY KEY (trigger_id)
);
CREATE TABLE validated_dependencies (
file_name VARCHAR(128),
file_sha1 CHAR(40),
validation_key CHAR(40),
validation_status INT,
PRIMARY KEY (validation_key, file_name, file_sha1)
);
[root@test azkaban]#
[root@test azkaban]#