027 大数据之Azkaban调度系统

1、Azkaban 3.0 中文文档

大数据平台 —— 调度系统之Azkaban
azkaban-web和azkaban-exec的一键启动和关闭(shell脚本)
解决 Azkaban Executor 启动后不会自动激活
Azkaban报错-azkaban.executor.ExecutorManagerException: No active executors found

azkaban启动和关闭脚本

#!/bin/bash
 
AZKABAN_WEB_PATH=/opt/module/azkaban/azkaban-web-server-3.84.4
AZKABAN_EXEC_PATH=/opt/module/azkaban/azkaban-exec-server-3.84.4
 
case $1 in
 
"start")
#开启3个exec server
	echo "-----------------------starting exec-------------------------"
        for host in hadoop102 hadoop103 hadoop104
	do
		echo "-----$host-----"
		    # eof标记前加'-'抑制首行Tab字符
			ssh -T $host <<-eof
			cd ${AZKABAN_EXEC_PATH}
			rm -f executorServerLog*
			bin/start-exec.sh
	                exit
	                eof
	done
	 
	#修改mysql中的值
	echo "---------------------waiting for executors-----------------------"
	sleep 30s
	echo "---------------------updata mysql columns-----------------------"
	#ssh hadoop102 "/usr/bin/mysql -uazkaban -p000000 -e 'update azkaban.executors set active =1'" > /dev/null 2>&1
	for host in hadoop102 hadoop103 hadoop104
	do
		echo "---------------------$host-----------------------"
			# eof标记前加'-'抑制首行Tab字符
			ssh -T  $host <<-eof
			curl -G "$host:12321/executor?action=activate" && echo
	                exit
	                eof
	done
	 
	#启动web
	echo "-----------------------starting web-------------------------"
	cd ${AZKABAN_WEB_PATH}
	bin/start-web.sh
 
;;
 
 
"stop")
	echo "--------------------------stoping web-------------------------"
	${AZKABAN_WEB_PATH}/bin/shutdown-web.sh
	for host in hadoop102 hadoop103 hadoop104
	do
		echo "-------------------------stoping executor server in $host-----------------------"
		ssh $host "${AZKABAN_EXEC_PATH}/bin/shutdown-exec.sh"
	 
	done
;;
esac

2、调度实战(先准备好log数据到hdfs,db数据到mysql)

那些年走过的azkaban的坑
gmall.flow

nodes:
  - name: mysql_to_hdfs
    type: command
    config:
     command: /home/atguigu/bin/mysql_to_hdfs.sh all ${dt}
    
  - name: hdfs_to_ods_log
    type: command
    config:
     command: /home/atguigu/bin/hdfs_to_ods_log.sh ${dt}
     
  - name: hdfs_to_ods_db
    type: command
    dependsOn: 
     - mysql_to_hdfs
    config: 
     command: /home/atguigu/bin/hdfs_to_ods_db.sh all ${dt}
  
  - name: ods_to_dim_db
    type: command
    dependsOn: 
     - hdfs_to_ods_db
    config: 
     command: /home/atguigu/bin/ods_to_dim_db.sh all ${dt}

  - name: ods_to_dwd_log
    type: command
    dependsOn: 
     - hdfs_to_ods_log
    config: 
     command: /home/atguigu/bin/ods_to_dwd_log.sh all ${dt}
    
  - name: ods_to_dwd_db
    type: command
    dependsOn: 
     - hdfs_to_ods_db
    config: 
     command: /home/atguigu/bin/ods_to_dwd_db.sh all ${dt}
    
  - name: dwd_to_dws
    type: command
    dependsOn:
     - ods_to_dim_db
     - ods_to_dwd_log
     - ods_to_dwd_db
    config:
     command: /home/atguigu/bin/dwd_to_dws.sh all ${dt}
    
  - name: dws_to_dwt
    type: command
    dependsOn:
     - dwd_to_dws
    config:
     command: /home/atguigu/bin/dws_to_dwt.sh all ${dt}
    
  - name: dwt_to_ads
    type: command
    dependsOn: 
     - dws_to_dwt
    config:
     command: /home/atguigu/bin/dwt_to_ads.sh all ${dt}
     
  - name: hdfs_to_mysql
    type: command
    dependsOn:
     - dwt_to_ads
    config:
      command: /home/atguigu/bin/hdfs_to_mysql.sh all

gmall.project

azkaban-flow-version: 2.0

查看各节点的ID
在这里插入图片描述
方案一:指定azkaban执行脚本的节点
在这里插入图片描述
方案二:在Executor所在所有节点部署任务所需脚本和应用。

# 分发脚本、sqoop、spark、my_env.sh
[atguigu@hadoop102 ~]$ xsync /home/atguigu/bin/
[atguigu@hadoop102 ~]$ xsync /opt/module/hive
[atguigu@hadoop102 ~]$ xsync /opt/module/sqoop
[atguigu@hadoop102 ~]$ xsync /opt/module/spark
[atguigu@hadoop102 ~]$ sudo /home/atguigu/bin/xsync /etc/profile.d/my_env.sh

调度执行状态
在这里插入图片描述

PS:Azkaban配置

解压Azkaban部署包

[atguigu@hadoop102 azkaban]$ ll
总用量 12
drwxr-xr-x. 2 atguigu atguigu 4096 4月  18 2020 azkaban-db-3.84.4
drwxr-xr-x. 6 atguigu atguigu 4096 4月  18 2020 azkaban-exec-server-3.84.4
drwxr-xr-x. 6 atguigu atguigu 4096 4月  18 2020 azkaban-web-server-3.84.4

配置MySQL

# 登陆MySQL,创建Azkaban数据库
mysql> create database azkaban;
Query OK, 1 row affected (0.00 sec)
# 创建Azkaban用户,任何主机都可以访问Azkaban,密码是000000
mysql> CREATE USER 'azkaban'@'%' IDENTIFIED BY '000000';
Query OK, 0 rows affected (0.06 sec)
# 赋予Azkaban用户增删改查权限 
mysql> GRANT SELECT,INSERT,UPDATE,DELETE ON azkaban.* to 'azkaban'@'%' WITH GRANT OPTION;
Query OK, 0 rows affected (0.04 sec)
# 创建Azkaban表,完成后退出MySQL
mysql> use azkaban;
Database changed
mysql> source /opt/module/azkaban/azkaban-db-3.84.4/create-all-sql-3.84.4.sql
# 更改MySQL包大小;防止Azkaban连接MySQL阻塞
[atguigu@hadoop102 azkaban]$ cat /etc/my.cnf
# For advice on how to change settings please see
# http://dev.mysql.com/doc/refman/5.7/en/server-configuration-defaults.html

[mysqld]
# 更改MySQL包大小;防止Azkaban连接MySQL阻塞
max_allowed_packet=1024M
#skip-grant-tables
#
# Remove leading # and set to the amount of RAM for the most important data
# cache in MySQL. Start at 70% of total RAM for dedicated server, else 10%.
# innodb_buffer_pool_size = 128M
#
# Remove leading # to turn on a very important data integrity option: logging
# changes to the binary log between backups.
# log_bin
#
# Remove leading # to set options mainly useful for reporting servers.
# The server defaults are faster for transactions and fast SELECTs.
# Adjust sizes as needed, experiment to find the optimal values.
# join_buffer_size = 128M
# sort_buffer_size = 2M
# read_rnd_buffer_size = 2M
datadir=/var/lib/mysql
socket=/var/lib/mysql/mysql.sock

# Disabling symbolic-links is recommended to prevent assorted security risks
symbolic-links=0

log-error=/var/log/mysqld.log
pid-file=/var/run/mysqld/mysqld.pid
# 重启MySQL
[atguigu@hadoop102 azkaban]$ sudo systemctl restart mysqld

配置Executor Server

# 编辑azkaban.properties
[atguigu@hadoop102 azkaban]$ cat /opt/module/azkaban/azkaban-exec-server-3.84.4/conf/azkaban.properties 
# Azkaban Personalization Settings
azkaban.name=Test
azkaban.label=My Local Azkaban
azkaban.color=#FF3601
azkaban.default.servlet.path=/index
web.resource.dir=web/
# 修改时区
default.timezone.id=Asia/Shanghai
# Azkaban UserManager class
user.manager.class=azkaban.user.XmlUserManager
user.manager.xml.file=conf/azkaban-users.xml
# Loader for projects
executor.global.properties=conf/global.properties
azkaban.project.dir=projects
# Velocity dev mode
velocity.dev.mode=false
# Azkaban Jetty server properties.
jetty.use.ssl=false
jetty.maxThreads=25
jetty.port=8081
# Where the Azkaban web server is located
azkaban.webserver.url=http://hadoop102:8081
# mail settings
mail.sender=
mail.host=
# User facing web server configurations used to construct the user facing server URLs. They are useful when there is a reverse proxy between Azkaban web servers and users.
# enduser -> myazkabanhost:443 -> proxy -> localhost:8081
# when this parameters set then these parameters are used to generate email links.
# if these parameters are not set then jetty.hostname, and jetty.port(if ssl configured jetty.ssl.port) are used.
# azkaban.webserver.external_hostname=myazkabanhost.com
# azkaban.webserver.external_ssl_port=443
# azkaban.webserver.external_port=8081
job.failure.email=
job.success.email=
lockdown.create.projects=false
cache.directory=cache
# JMX stats
jetty.connector.stats=true
executor.connector.stats=true
# Azkaban plugin settings
azkaban.jobtype.plugin.dir=plugins/jobtypes
# Azkaban mysql settings by default. Users should configure their own username and password.
database.type=mysql
mysql.port=3306
# 修改主机
mysql.host=hadoop102
mysql.database=azkaban
mysql.user=azkaban
# 修改密码
mysql.password=000000
mysql.numconnections=100
# Azkaban Executor settings
executor.maxThreads=50
executor.flow.threads=30
# 设置端口
executor.port=12321

# 同步azkaban-exec到所有节点
[atguigu@hadoop102 azkaban]$ xsync /opt/module/azkaban/azkaban-exec-server-3.84.4/

启动executor server

[atguigu@hadoop102 azkaban-exec-server-3.84.4]$ pwd
/opt/module/azkaban/azkaban-exec-server-3.84.4
# 在三台机器上分别启动
[atguigu@hadoop102 azkaban-exec-server-3.84.4]$ bin/start-exec.sh
[atguigu@hadoop103 azkaban-exec-server-3.84.4]$ bin/start-exec.sh
[atguigu@hadoop104 azkaban-exec-server-3.84.4]$ bin/start-exec.sh
# azkaban启动成功标识
[atguigu@hadoop103 azkaban-exec-server-3.84.4]$ ll
-rw-rw-r--. 1 atguigu atguigu     5 5月  14 21:31 executor.port

激活executor

[atguigu@hadoop102 azkaban-exec-server-3.84.4]$ curl -G "hadoop102:12321/executor?action=activate" && echo
{"status":"success"}
[atguigu@hadoop103 azkaban-exec-server-3.84.4]$ curl -G "hadoop102:12321/executor?action=activate" && echo
{"status":"success"}
[atguigu@hadoop104 azkaban-exec-server-3.84.4]$ curl -G "hadoop102:12321/executor?action=activate" && echo
{"status":"success"}

配置Web Server

[atguigu@hadoop102 azkaban-exec-server-3.84.4]$ cat /opt/module/azkaban/azkaban-web-server-3.84.4/conf/azkaban.properties 
# Azkaban Personalization Settings
azkaban.name=Test
azkaban.label=My Local Azkaban
azkaban.color=#FF3601
azkaban.default.servlet.path=/index
web.resource.dir=web/
# 更改时区
default.timezone.id=Asia/Shanghai
# Azkaban UserManager class
user.manager.class=azkaban.user.XmlUserManager
user.manager.xml.file=conf/azkaban-users.xml
# Loader for projects
executor.global.properties=conf/global.properties
azkaban.project.dir=projects
# Velocity dev mode
velocity.dev.mode=false
# Azkaban Jetty server properties.
jetty.use.ssl=false
jetty.maxThreads=25
jetty.port=8081
# Azkaban Executor settings
# mail settings
mail.sender=
mail.host=
# User facing web server configurations used to construct the user facing server URLs. They are useful when there is a reverse proxy between Azkaban web servers and users.
# enduser -> myazkabanhost:443 -> proxy -> localhost:8081
# when this parameters set then these parameters are used to generate email links.
# if these parameters are not set then jetty.hostname, and jetty.port(if ssl configured jetty.ssl.port) are used.
# azkaban.webserver.external_hostname=myazkabanhost.com
# azkaban.webserver.external_ssl_port=443
# azkaban.webserver.external_port=8081
job.failure.email=
job.success.email=
lockdown.create.projects=false
cache.directory=cache
# JMX stats
jetty.connector.stats=true
executor.connector.stats=true
# Azkaban mysql settings by default. Users should configure their own username and password.
database.type=mysql
mysql.port=3306
# 更改主机
mysql.host=hadoop102
mysql.database=azkaban
mysql.user=azkaban
# 更改密码
mysql.password=000000
mysql.numconnections=100
#Multiple Executor
azkaban.use.multiple.executors=true
#StaticRemainingFlowSize:正在排队的任务数;
#CpuStatus:CPU占用情况
#MinimumFreeMemory:内存占用情况。测试环境,必须将MinimumFreeMemory删除掉,否则它会认为集群资源不够,不执行
azkaban.executorselector.filters=StaticRemainingFlowSize,CpuStatus
azkaban.executorselector.comparator.NumberOfAssignedFlowComparator=1
azkaban.executorselector.comparator.Memory=1
azkaban.executorselector.comparator.LastDispatched=1
azkaban.executorselector.comparator.CpuUsage=1
[atguigu@hadoop102 conf]$ cat /opt/module/azkaban/azkaban-web-server-3.84.4/conf/azkaban-users.xml 
<azkaban-users>
  <user groups="azkaban" password="azkaban" roles="admin" username="azkaban"/>
  <user password="metrics" roles="metrics" username="metrics"/>
  <!--添加atguigu用户-->
  <user password="atguigu" roles="metrics,admin" username="atguigu"/>
  <role name="admin" permissions="ADMIN"/>
  <role name="metrics" permissions="METRICS"/>
</azkaban-users>

启动web server

[atguigu@hadoop102 azkaban-web-server-3.84.4]$ pwd
/opt/module/azkaban/azkaban-web-server-3.84.4
[atguigu@hadoop102 azkaban-web-server-3.84.4]$ bin/start-web.sh

使用jieky登录:http://hadoop102:8081

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值