Centos7.6集群部署海豚调度3.1.5

前置准备工作(所有机器)

主机规划

主机名主机ip角色服务(端口)
ty-m110.0.1.102masterMasterServer(5678),pg15(5432)
ty-m210.0.0.232workerWorkerServer(1234),alertServer
ty-m310.0.1.203workerWorkerServer(1234),apiServers(12345)

数据库规划

属性
主机名ty-m1
主机ip10.0.1.102
数据库版本pg15
${PGDATABASE}dp
${PGUSER}dp
${PGPORT}5432
${PGDATA}/data/pgsql/data
${PGHOME}/usr/local/pgsql

用户规划

用户名权限
dp具有sudo免密权限
# 添加用户dp
useradd dp
# 为用户dp设置密码
passwd dp
# sudo免密
sed -i '$adp  ALL=(ALL)  NOPASSWD: NOPASSWD: ALL' /etc/sudoers
sed -i 's/Defaults    requirett/#Defaults    requirett/g' /etc/sudoers

注意:

  • 因为任务执行服务是以 sudo -u {linux-user} 切换不同 linux 用户的方式来实现多租户运行作业,所以部署用户需要有 sudo 权限,而且是免密的。初学习者不理解的话,完全可以暂时忽略这一点
  • 如果发现 /etc/sudoers 文件中有 “Defaults requirett” 这行,也请注释掉

目录规划

目录用途归属
/usr/local/jdk-1.8jdk安装目录root
/usr/local/zookeeperzookeeper安装目录root
/data/zookeeper/datazookeeper数据目录root
/usr/local/dolphinscheduler-app海豚调度UI安装目录dp
/usr/local/dolphinscheduler海豚调度二进制目录dp
mkdir -p /data/zookeeper/data
mkdir -p /usr/local/dolphinscheduler-app
chown -R dp.dp /usr/local/dolphinscheduler-app

配置/etc/hosts

echo '
10.0.1.102 ty-m1
10.0.0.232 ty-m2
10.0.1.203 ty-m3' >> /etc/hosts

jdk安装

# 我们把下载的东西全放在 /opt 下
cd /opt
# 上传jdk-1.8 
rz
cd /usr/local && tar -zxvf /opt/jdk-8u371-linux-x64.tar.gz
mv jdk1.8.0_371 jdk-1.8
# 配置环境变量 JAVA_HOME 和 PATH
echo 'export JAVA_HOME=/usr/local/jdk-1.8
export PATH=$PATH:$JAVA_HOME/bin
' >> /etc/profile
# 使环境变量立即生效
source /etc/profile

进程树分析

  • macOS安装pstree
  • Fedora/Red/Hat/CentOS/Ubuntu/Debian安装psmisc
  • DolphinScheduler 本身不依赖 Hadoop、Hive、Spark,但如果你运行的任务需要依赖他们,就需要有对应的环境支持
yum -y install psmisc

配置ssh免密

# root免密
# 生成公钥
ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
# 将公钥分发到所有机器上
ssh-copy-id -i ~/.ssh/id_rsa.pub -p 22 root@ty-m1
ssh-copy-id -i ~/.ssh/id_rsa.pub -p 22 root@ty-m2
ssh-copy-id -i ~/.ssh/id_rsa.pub -p 22 root@ty-m3
chmod 600 ~/.ssh/authorized_keys

# 切换到dp用户
su - dp
# 生成公钥
ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
# 将公钥分发到所有机器上
ssh-copy-id -i ~/.ssh/id_rsa.pub -p 22 dp@ty-m1
ssh-copy-id -i ~/.ssh/id_rsa.pub -p 22 dp@ty-m2
ssh-copy-id -i ~/.ssh/id_rsa.pub -p 22 dp@ty-m3
chmod 600 ~/.ssh/authorized_keys

# 测试
ssh localhost
ssh ty-m1
ssh ty-m2
ssh ty-m3

部署zookeeper

# 切回root
exit

cd /opt && wget https://dlcdn.apache.org/zookeeper/zookeeper-3.7.1/apache-zookeeper-3.7.1-bin.tar.gz --no-check-certificate
cd /usr/local/ && tar -zxvf /opt/apache-zookeeper-3.7.1-bin.tar.gz && mv /usr/local/apache-zookeeper-3.7.1-bin /usr/local/zookeeper
echo '
# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial
# synchronization phase can take
initLimit=10
# The number of ticks that can pass between
# sending a request and getting an acknowledgement
syncLimit=5
# the directory where the snapshot is stored.
# do not use /tmp for storage, /tmp here is just
# example sakes.
dataDir=/data/zookeeper/data
# the port at which the clients will connect
clientPort=12181
# the maximum number of client connections.
# increase this if you need to handle more clients
#maxClientCnxns=60
#
# Be sure to read the maintenance section of the
# administrator guide before turning on autopurge.
#
# http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
#
# The number of snapshots to retain in dataDir
#autopurge.snapRetainCount=3
# Purge task interval in hours
# Set to "0" to disable auto purge feature
#autopurge.purgeInterval=1

## Metrics Providers
#
# https://prometheus.io Metrics Exporter
#metricsProvider.className=org.apache.zookeeper.metrics.prometheus.PrometheusMetricsProvider
#metricsProvider.httpPort=7000
#metricsProvider.exportJvmInfo=true

server.1=ty-m1:12888:13888
server.2=ty-m2:14888:15888
server.3=ty-m3:16888:17888' > /usr/local/zookeeper/conf/zoo.cfg
# server.1=ty-m1:12888:13888
# server.2=ty-m2:14888:15888
# server.3=ty-m3:16888:17888
# 1 2 3 分别写进对应机器的zookeeper数据目录的myid文件中
# ty-m1
echo '1' > /data/zookeeper/data/myid
# ty-m2
echo '2' > /data/zookeeper/data/myid
# ty-m3
echo '3' > /data/zookeeper/data/myid

启动zookeeper

# 启动
/usr/local/zookeeper/bin/zkServer.sh start
# 查看状态
/usr/local/zookeeper/bin/zkServer.sh status

下载DolphinScheduler 二进制包

cd /opt && wget https://archive.apache.org/dist/dolphinscheduler/3.1.5/apache-dolphinscheduler-3.1.5-bin.tar.gz
cd /usr/local/ && tar -zxvf /opt/apache-dolphinscheduler-3.1.5-bin.tar.gz && mv /usr/local/apache-dolphinscheduler-3.1.5-bin /usr/local/dolphinscheduler
# 修改权限
chown -R dp:dp /usr/local/dolphinscheduler

修改install_env.sh配置

echo '#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# ---------------------------------------------------------
# INSTALL MACHINE
# ---------------------------------------------------------
# A comma separated list of machine hostname or IP would be installed DolphinScheduler,
# including master, worker, api, alert. If you want to deploy in pseudo-distributed
# mode, just write a pseudo-distributed hostname
# Example for hostnames: ips="ds1,ds2,ds3,ds4,ds5", Example for IPs: ips="192.168.8.1,192.168.8.2,192.168.8.3,192.168.8.4,192.168.8.5"
ips=${ips:-"ty-m1,ty-m2,ty-m3"}

# Port of SSH protocol, default value is 22. For now we only support same port in all `ips` machine
# modify it if you use different ssh port
sshPort=${sshPort:-"22"}

# A comma separated list of machine hostname or IP would be installed Master server, it
# must be a subset of configuration `ips`.
# Example for hostnames: masters="ds1,ds2", Example for IPs: masters="192.168.8.1,192.168.8.2"
masters=${masters:-"ty-m1"}

# A comma separated list of machine <hostname>:<workerGroup> or <IP>:<workerGroup>.All hostname or IP must be a
# subset of configuration `ips`, And workerGroup have default value as `default`, but we recommend you declare behind the hosts
# Example for hostnames: workers="ds1:default,ds2:default,ds3:default", Example for IPs: workers="192.168.8.1:default,192.168.8.2:default,192.168.8.3:default"
workers=${workers:-"ty-m2:default,ty-m3:default"}

# A comma separated list of machine hostname or IP would be installed Alert server, it
# must be a subset of configuration `ips`.
# Example for hostname: alertServer="ds3", Example for IP: alertServer="192.168.8.3"
alertServer=${alertServer:-"ty-m2"}

# A comma separated list of machine hostname or IP would be installed API server, it
# must be a subset of configuration `ips`.
# Example for hostname: apiServers="ds1", Example for IP: apiServers="192.168.8.1"
apiServers=${apiServers:-"ty-m3"}

# The directory to install DolphinScheduler for all machine we config above. It will automatically be created by `install.sh` script if not exists.
# Do not set this configuration same as the current path (pwd). Do not add quotes to it if you using related path.
installPath=${installPath:-"/usr/local/dolphinscheduler-app"}

# The user to deploy DolphinScheduler for all machine we config above. For now user must create by yourself before running `install.sh`
# script. The user needs to have sudo privileges and permissions to operate hdfs. If hdfs is enabled than the root directory needs
# to be created by this user
deployUser=${deployUser:-"dp"}

# The root of zookeeper, for now DolphinScheduler default registry server is zookeeper.
zkRoot=${zkRoot:-"/dp"}' > /usr/local/dolphinscheduler/bin/env/install_env.sh

修改dolphinscheduler_env.sh配置文件

echo '#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# JAVA_HOME, will use it to start DolphinScheduler server
export JAVA_HOME=${JAVA_HOME:-/usr/local/jdk-1.8}

# Database related configuration, set database type, username and password
export DATABASE=${DATABASE:-postgresql}
export SPRING_PROFILES_ACTIVE=${DATABASE}
export SPRING_DATASOURCE_URL="jdbc:postgresql://10.0.1.102:5432/dp"
export SPRING_DATASOURCE_USERNAME="dp"
export SPRING_DATASOURCE_PASSWORD="000000"

# DolphinScheduler server related configuration
export SPRING_CACHE_TYPE=${SPRING_CACHE_TYPE:-none}
export SPRING_JACKSON_TIME_ZONE=${SPRING_JACKSON_TIME_ZONE:-Asia/Shanghai}
export MASTER_FETCH_COMMAND_NUM=${MASTER_FETCH_COMMAND_NUM:-10}

# Registry center configuration, determines the type and link of the registry center
export REGISTRY_TYPE=${REGISTRY_TYPE:-zookeeper}
export REGISTRY_ZOOKEEPER_CONNECT_STRING=${REGISTRY_ZOOKEEPER_CONNECT_STRING:-localhost:12181}

# Tasks related configurations, need to change the coinfiguration if you use the related tasks.
export HADOOP_HOME=${HADOOP_HOME:-/usr/local/hadoop}
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/usr/local/hadoop/etc/hadoop}
export SPARK_HOME1=${SPARK_HOME1:-/usr/local/spark1}
export SPARK_HOME2=${SPARK_HOME2:-/usr/local/spark2}
export PYTHON_HOME=${PYTHON_HOME:-/usr/local/python}
export HIVE_HOME=${HIVE_HOME:-/usr/local/hive}
export FLINK_HOME=${FLINK_HOME:-/usr/local/flink}
export DATAX_HOME=${DATAX_HOME:-/usr/local/datax}
export SEATUNNEL_HOME=${SEATUNNEL_HOME:-/opt/soft/seatunnel}
export CHUNJUN_HOME=${CHUNJUN_HOME:-/opt/soft/chunjun}

export PATH=$HADOOP_HOME/bin:$SPARK_HOME1/bin:$SPARK_HOME2/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$SEATUNNEL_HOME/bin:$CHUNJUN_HOME/bin:$PATH'> /usr/local/dolphinscheduler/bin/env/dolphinscheduler_env.sh

安装(ty-m1)

安装pg15

Centos7.6安装postgresql15

配置dp数据库

Centos7.6安装postgresql15——建库

初始化元数据

su - dp
bash /usr/local/dolphinscheduler/tools/bin/upgrade-schema.sh
  • 海豚调度部分注释
  • 海豚调度的数据源密码在数据库中显示的是明文,所以对该列进行权限控制
comment on table t_ds_user is '用户表';
comment on column t_ds_user.id is '自增id,用户id';
comment on column t_ds_user.user_name is '用户名称';
comment on column t_ds_user.user_password is '用户密码';
comment on column t_ds_user.user_type is '用户类型:普通用户1  管理员0';
comment on column t_ds_user.email is '邮箱地址';
comment on column t_ds_user.phone is '手机号码';
comment on column t_ds_user.tenant_id is '所属租户id,对应t_ds_tenant.id';
comment on column t_ds_user.create_time is '用户创建时间';
comment on column t_ds_user.update_time is '用户最近更新时间';
comment on column t_ds_user.queue is '所属队列值';
comment on column t_ds_user.state is '状态:启用1  停用0';


comment on table t_ds_session is '实时会话表';
comment on column t_ds_session.id is '会话id';
comment on column t_ds_session.user_id is '用户id,对应t_ds_user.id';
comment on column t_ds_session.ip is '登陆ip';
comment on column t_ds_session.last_login_time is '最后一次登陆时间';

comment on table t_ds_access_token is '令牌管理表';
comment on column t_ds_access_token.id is '令牌id';
comment on column t_ds_access_token.user_id is '用户id,对应t_ds_user.id';
comment on column t_ds_access_token.token is '令牌';
comment on column t_ds_access_token.expire_time is '过期时间';
comment on column t_ds_access_token.create_time is '令牌创建时间';
comment on column t_ds_access_token.update_time is '令牌更新时间';

comment on table t_ds_datasource is '数据源管理表';
comment on column t_ds_datasource.id is '数据源id';
comment on column t_ds_datasource.name is '数据源名称';
comment on column t_ds_datasource.note is '数据源描述(备注)';
comment on column t_ds_datasource.type is '数据源类型:MYSQL 0 POSTGRESQL 1 HIVE/IMPALA 2 SPARK 3 CLICKHOUSE 4 ORACLE 5 SQLSERVER 6 DB2 7 PRESTO 8 REDSHIFT 9 ATHENA 10';
comment on column t_ds_datasource.user_id is '用户id,对应t_ds_user.id';
comment on column t_ds_datasource.connection_params is '连接参数,json格式,里面有明文密码,所以禁止任何其他用户访问该字段';
revoke select on table t_ds_datasource from public;
grant select (id,"name",note,"type",user_id,create_time,update_time) on table t_ds_datasource to public;
comment on column t_ds_datasource.create_time is '数据源创建时间';
comment on column t_ds_datasource.update_time is '数据源最近更新时间';

comment on table t_ds_relation_datasource_user is '数据源授权给用户关系表';
comment on column t_ds_relation_datasource_user.id is '数据源-用户关系id';
comment on column t_ds_relation_datasource_user.user_id is '用户id,对应t_ds_user.id';
comment on column t_ds_relation_datasource_user.datasource_id is '数据源id,对应t_ds_datasource.id';
comment on column t_ds_relation_datasource_user.create_time is '关系创建时间';
comment on column t_ds_relation_datasource_user.update_time is '关系最近一次更新时间';

comment on table t_ds_tenant is '租户表';
comment on column t_ds_tenant.id is '租户id';
comment on column t_ds_tenant.tenant_code is '租户代码,即操作系统用户名';
comment on column t_ds_tenant.description is '租户描述';
comment on column t_ds_tenant.queue_id is '队列id,对应t_ds_queue.id';
comment on column t_ds_tenant.create_time is '租户创建时间';
comment on column t_ds_tenant.update_time is '租户最近更新时间';

comment on table t_ds_queue is '队列表';
comment on column t_ds_queue.id is '队列id';
comment on column t_ds_queue.queue_name is '队列名称';
comment on column t_ds_queue.queue is '队列值';
comment on column t_ds_queue.create_time is '队列创建时间';
comment on column t_ds_queue.update_time is '队列最近更新时间';

comment on table t_ds_project is '项目表';
comment on column t_ds_project.id is '项目id';
comment on column t_ds_project.name is '项目名称';
comment on column t_ds_project.code is '项目编码';
comment on column t_ds_project.description is '项目描述';
comment on column t_ds_project.user_id is '用户id,对应t_ds_user.id';
comment on column t_ds_project.create_time is '项目创建时间';
comment on column t_ds_project.update_time is '项目最近更新时间';

comment on table t_ds_relation_project_user is '项目授权给用户关系表';
comment on column t_ds_relation_project_user.id is '项目-用户关系id';
comment on column t_ds_relation_project_user.user_id is '用户id,对应t_ds_user.id';
comment on column t_ds_relation_project_user.project_id is '项目id,对应t_ds_project.id';
comment on column t_ds_relation_project_user.create_time is '关系创建时间';
comment on column t_ds_relation_project_user.update_time is '关系最近更新时间';

comment on table t_ds_alertgroup is '告警组';
comment on column t_ds_alertgroup.id is '告警组id';
comment on column t_ds_alertgroup.alert_instance_ids is '告警组实例id,,分隔';
comment on column t_ds_alertgroup.create_user_id is '创建该告警组的用户id,对应t_ds_user.id';
comment on column t_ds_alertgroup.group_name is '告警组名称';
comment on column t_ds_alertgroup.description is '告警组描述';
comment on column t_ds_alertgroup.create_time is '告警组创建时间';
comment on column t_ds_alertgroup.update_time is '告警组最近更新时间';

comment on table t_ds_process_definition is '工作流定义';
comment on column t_ds_process_definition.id is '工作流id';
comment on column t_ds_process_definition.code is '工作流编号';
comment on column t_ds_process_definition.name is '工作流名称';
comment on column t_ds_process_definition.version is '工作流版本号';
comment on column t_ds_process_definition.description is '工作流描述';
comment on column t_ds_process_definition.project_code is '工作流所属项目编号';
comment on column t_ds_process_definition.release_state is '版本状态 1上线 0下线';
comment on column t_ds_process_definition.user_id is '版本所属用户id,对应t_ds_user.id';
comment on column t_ds_process_definition.global_params is '全局参数定义';
comment on column t_ds_process_definition.locations is '包含的任务编号和所在工作流中坐标位置';
comment on column t_ds_process_definition.warning_group_id is '告警组id';
comment on column t_ds_process_definition.execution_type is '执行类型';
comment on column t_ds_process_definition.create_time is '工作流创建时间';
comment on column t_ds_process_definition.update_time is '工作流最近更新时间';

comment on table t_ds_schedules is '调度配置表';
comment on column t_ds_schedules.id is '调度id';
comment on column t_ds_schedules.process_definition_code is '工作流编号,对应t_ds_process_definition.code';
comment on column t_ds_schedules.start_time is '调度生效时间';
comment on column t_ds_schedules.end_time is '调度失效时间';
comment on column t_ds_schedules.timezone_id is '时区di';
comment on column t_ds_schedules.crontab is 'crontab表达式';
comment on column t_ds_schedules.failure_strategy is '失败策略';
comment on column t_ds_schedules.user_id is '配置该调度的用户id,对应t_ds_user.id';
comment on column t_ds_schedules.release_state is '调度上线状态 1上线 0下线';
comment on column t_ds_schedules.warning_type is '告警类型';
comment on column t_ds_schedules.warning_group_id is '告警组id';
comment on column t_ds_schedules.process_instance_priority is '流程优先级';
comment on column t_ds_schedules.worker_group is '工作组';
comment on column t_ds_schedules.environment_code is '环境编号';
comment on column t_ds_schedules.create_time is '调度创建时间';
comment on column t_ds_schedules.update_time is '调度最近更新时间';

comment on table t_ds_task_definition is '任务定义表';
comment on column t_ds_task_definition.id is '任务id';
comment on column t_ds_task_definition.code is '任务编号';
comment on column t_ds_task_definition.name is '任务名称';
comment on column t_ds_task_definition.version is '任务版本号';
comment on column t_ds_task_definition.description is '任务描述';
comment on column t_ds_task_definition.project_code is '项目编号,对应t_ds_project.code';
comment on column t_ds_task_definition.user_id is '用户id,对应t_ds_user.id';
comment on column t_ds_task_definition.task_type is '任务类型';
comment on column t_ds_task_definition.task_execute_type is '任务执行类型';
comment on column t_ds_task_definition.task_params is '任务参数';
comment on column t_ds_task_definition.task_priority is '任务优先级';
comment on column t_ds_task_definition.worker_group is '工作组';
comment on column t_ds_task_definition.environment_code is '环境编号';
comment on column t_ds_task_definition.fail_retry_times is '失败重试次数';
comment on column t_ds_task_definition.fail_retry_interval is '失败重试频率';
comment on column t_ds_task_definition.delay_time is '延时执行时间';
comment on column t_ds_task_definition.task_group_id is '任务组id';
comment on column t_ds_task_definition.task_group_priority is '任务组优先级';
comment on column t_ds_task_definition.cpu_quota is 'cpu分配大小';
comment on column t_ds_task_definition.memory_max is '内存最大分配多少';
comment on column t_ds_task_definition.create_time is '任务创建时间';
comment on column t_ds_task_definition.update_time is '任务最近更新时间';

comment on table t_ds_process_task_relation is '工作流-任务关系表';
comment on column t_ds_process_task_relation.id is '关系id';
comment on column t_ds_process_task_relation.name is '关系名称';
comment on column t_ds_process_task_relation.project_code is '项目编号,对应t_ds_project.code';
comment on column t_ds_process_task_relation.process_definition_code is '工作流定义编号,对应t_ds_process_definition.code';
comment on column t_ds_process_task_relation.process_definition_version is '工作流定义版本,对应t_ds_process_definition.version';
comment on column t_ds_process_task_relation.pre_task_code is '前置任务编号,对应t_ds_task_definition.code';
comment on column t_ds_process_task_relation.pre_task_version is '前置任务版本,对应t_ds_task_definition.version';
comment on column t_ds_process_task_relation.post_task_code is '后置任务编号,对应t_ds_task_definition.code';
comment on column t_ds_process_task_relation.post_task_version is '后置任务版本,对应t_ds_task_definition.version';
comment on column t_ds_process_task_relation.condition_type is '条件类型';
comment on column t_ds_process_task_relation.condition_params is '条件参数';
comment on column t_ds_process_task_relation.create_time is '关系创建时间';
comment on column t_ds_process_task_relation.update_time is '关系最近更新时间';

comment on table t_ds_process_instance is '工作流实例表';
comment on column t_ds_process_instance.id is '工作流实例id';
comment on column t_ds_process_instance.name is '工作流实例名称';
comment on column t_ds_process_instance.process_definition_code is '工作流定义编号,对应t_ds_process_definition.code';
comment on column t_ds_process_instance.process_definition_version is '工作流定义版本,对应t_ds_process_definition.version';
comment on column t_ds_process_instance.state is '工作流实例状态';
comment on column t_ds_process_instance.state_history is '工作流实例历史状态';
comment on column t_ds_process_instance.start_time is '工作流实例开始时间';
comment on column t_ds_process_instance.end_time is '工作流实例结束时间';
comment on column t_ds_process_instance.run_times is '工作流实例运行次数';
comment on column t_ds_process_instance.host is '运行主机ip地址';
comment on column t_ds_process_instance.command_type is '命令类型';
comment on column t_ds_process_instance.command_param is '命令参数';
comment on column t_ds_process_instance.task_depend_type is '任务以来类型';
comment on column t_ds_process_instance.max_try_times is '做多尝试执行次数';
comment on column t_ds_process_instance.failure_strategy is '失败策略';
comment on column t_ds_process_instance.warning_type is '警告类型';
comment on column t_ds_process_instance.warning_group_id is '告警组id';
comment on column t_ds_process_instance.schedule_time is '调度时间';
comment on column t_ds_process_instance.command_start_time is '命令开始执行时间';
comment on column t_ds_process_instance.global_params is '全局参数定义';
comment on column t_ds_process_instance.process_instance_json is '工作流实例json';
comment on column t_ds_process_instance.update_time is '最近一次更新时间';
comment on column t_ds_process_instance.is_sub_process is '是否子工作流';
comment on column t_ds_process_instance.executor_id is '执行器id';
comment on column t_ds_process_instance.history_cmd is '历史命令';
comment on column t_ds_process_instance.dependence_schedule_times is '依赖调度次数';
comment on column t_ds_process_instance.process_instance_priority is '工作流实例优先级';
comment on column t_ds_process_instance.worker_group is '工作组';
comment on column t_ds_process_instance.environment_code is '环境编号';
comment on column t_ds_process_instance.tenant_id is '租户id';
comment on column t_ds_process_instance.restart_time is '重新开始时间';

comment on table t_ds_task_instance is '任务实例表';
comment on column t_ds_task_instance.id is '任务实例id';
comment on column t_ds_task_instance.name is '任务实例名称';
comment on column t_ds_task_instance.task_type is '任务类型';
comment on column t_ds_task_instance.task_execute_type is '任务执行类型';
comment on column t_ds_task_instance.task_code is '任务编号,对应t_ds_task_definition.code';
comment on column t_ds_task_instance.task_definition_version is '任务定义版本,对应t_ds_task_definition.version';
comment on column t_ds_task_instance.process_instance_id is '工作流实例id,对应t_ds_process_instance.id';
comment on column t_ds_task_instance.state is '任务实例状态';
comment on column t_ds_task_instance.submit_time is '提交时间';
comment on column t_ds_task_instance.start_time is '开始时间';
comment on column t_ds_task_instance.end_time is '结束时间';
comment on column t_ds_task_instance.host is '主机ip地址';
comment on column t_ds_task_instance.execute_path is '执行路径';
comment on column t_ds_task_instance.log_path is '日志路径';
comment on column t_ds_task_instance.alert_flag is '告警标志';
comment on column t_ds_task_instance.retry_times is '重试次数';
comment on column t_ds_task_instance.pid is '进程id';
comment on column t_ds_task_instance.app_link is '应用链接';
comment on column t_ds_task_instance.task_params is '任务参数';
comment on column t_ds_task_instance.retry_interval is '重试频率';
comment on column t_ds_task_instance.max_retry_times is '最多重试次数';
comment on column t_ds_task_instance.task_instance_priority is '任务实例优先级';
comment on column t_ds_task_instance.worker_group is '工作组';
comment on column t_ds_task_instance.environment_code is '环境编号';
comment on column t_ds_task_instance.environment_config is '环境配置';
comment on column t_ds_task_instance.executor_id is '执行器id';
comment on column t_ds_task_instance.first_submit_time is '首次提交时间';
comment on column t_ds_task_instance.delay_time is '延迟时间';
comment on column t_ds_task_instance.task_group_id is '任务组id';
comment on column t_ds_task_instance.var_pool is '变量池';
comment on column t_ds_task_instance.cpu_quota is 'cpu分配大小';
comment on column t_ds_task_instance.memory_max is '内存最大分配多少';

添加Apache Commons jar

  • 没有添加时会报如下错误
Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/commons/cli/DefaultParser
        at org.apache.zookeeper.cli.DeleteAllCommand.parse(DeleteAllCommand.java:52)
        at org.apache.zookeeper.ZooKeeperMain.processZKCmd(ZooKeeperMain.java:438)
        at org.apache.zookeeper.ZooKeeperMain.processCmd(ZooKeeperMain.java:367)
        at org.apache.zookeeper.ZooKeeperMain.run(ZooKeeperMain.java:350)
        at org.apache.zookeeper.ZooKeeperMain.main(ZooKeeperMain.java:293)
Caused by: java.lang.ClassNotFoundException: org.apache.commons.cli.DefaultParser
        at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
        at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
        at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:331)
        at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
        ... 5 more

CSDN资源:commons-cli-1.5.0.jar

百度网盘:commons-cli-1.5.0.jar 提取码:attr

# ty-m1 ty-m2 ty-m3都执行以下操作
cp /opt/commons-cli-1.5.0.jar /usr/local/dolphinscheduler/alert-server/libs/
cp /opt/commons-cli-1.5.0.jar /usr/local/dolphinscheduler/api-server/libs/
cp /opt/commons-cli-1.5.0.jar /usr/local/dolphinscheduler/master-server/libs/
cp /opt/commons-cli-1.5.0.jar /usr/local/dolphinscheduler/worker-server/libs/
cp /opt/commons-cli-1.5.0.jar /usr/local/dolphinscheduler/standalone-server/libs/
cp /opt/commons-cli-1.5.0.jar /usr/local/dolphinscheduler/tools/libs/

安装dolphinscheduler-ui

bash /usr/local/dolphinscheduler/bin/install.sh

启停服务

# 一键停止集群所有服务
bash /usr/local/dolphinscheduler/bin/stop-all.sh

# 一键开启集群所有服务
bash /usr/local/dolphinscheduler/bin/start-all.sh

# 启停 Master
bash /usr/local/dolphinscheduler/bin/dolphinscheduler-daemon.sh stop master-server
bash /usr/local/dolphinscheduler/bin/dolphinscheduler-daemon.sh start master-server

# 启停 Worker
bash /usr/local/dolphinscheduler/bin/dolphinscheduler-daemon.sh start worker-server
bash /usr/local/dolphinscheduler/bin/dolphinscheduler-daemon.sh stop worker-server

# 启停 Api
bash /usr/local/dolphinscheduler/bin/dolphinscheduler-daemon.sh start api-server
bash /usr/local/dolphinscheduler/bin/dolphinscheduler-daemon.sh stop api-server

# 启停 Alert
bash /usr/local/dolphinscheduler/bin/dolphinscheduler-daemon.sh start alert-server
bash /usr/local/dolphinscheduler/bin/dolphinscheduler-daemon.sh stop alert-server

登录

http://223.242.38.242:12345/dolphinscheduler/ui/

  • 初始账号密码:admin/dolphinscheduler123
    在这里插入图片描述

在这里插入图片描述

datax执行报错

[INFO] 2023-04-28 13:49:28.937 +0800 -  -> /tmp/dolphinscheduler/exec/process/root/9372194505888/9372266588832_5/9/11/9_11_node.sh: line 1: /usr/local/python/bin/python2.7: No such file or directory
# 解决方案
ln -s  /usr/bin/python /usr/local/python/bin/python2.7
[ERROR] 2023-04-26 07:43:15.558 +0000 - Unable to make field private final int java.lang.ProcessImpl.pid accessible: module java.base does not "opens java.lang" to unnamed module @1a814726
java.lang.reflect.InaccessibleObjectException: Unable to make field private final int java.lang.ProcessImpl.pid accessible: module java.base does not "opens java.lang" to unnamed module @1a814726
 at java.base/java.lang.reflect.AccessibleObject.throwInaccessibleObjectException(AccessibleObject.java:387)
 at java.base/java.lang.reflect.AccessibleObject.checkCanSetAccessible(AccessibleObject.java:363)
 at java.base/java.lang.reflect.AccessibleObject.checkCanSetAccessible(AccessibleObject.java:311)
 at java.base/java.lang.reflect.Field.checkCanSetAccessible(Field.java:181)
 at java.base/java.lang.reflect.Field.setAccessible(Field.java:175)
 at org.apache.dolphinscheduler.plugin.task.api.AbstractCommandExecutor.getProcessId(AbstractCommandExecutor.java:432)
 at org.apache.dolphinscheduler.plugin.task.api.AbstractCommandExecutor.run(AbstractCommandExecutor.java:205)
 at org.apache.dolphinscheduler.plugin.task.datax.DataxTask.handle(DataxTask.java:161)
 at org.apache.dolphinscheduler.server.worker.runner.DefaultWorkerDelayTaskExecuteRunnable.executeTask(DefaultWorkerDelayTaskExecuteRunnable.java:49)
 at org.apache.dolphinscheduler.server.worker.runner.WorkerTaskExecuteRunnable.run(WorkerTaskExecuteRunnable.java:174)
 at java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:577)
 at com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:131)
 at com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:74)
 at com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:82)
 at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
 at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
 at java.
  • 该报错是由于jdk版本过高导致!我用的jdk-20,改成jdk-1.8后正常
/usr/local/python/bin/python2.7: No such file or directory
  • 该报错是因为没有/usr/local/python/bin/python2.7文件
which python
mkdir -p /usr/local/python/bin/
# 创建软连接
ln -s /usr/bin/python /usr/local/python/bin/python2.7

参考

开源任务调度平台dolphinscheduler-3.1.3/3.1.4部署及使用指南(未完)

【ZooKeeper】ZooKeeper安装及简单操作

Version 3.1.5/部署指南/集群部署(Cluster)

  • 2
    点赞
  • 10
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 7
    评论
评论 7
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

sqlboy-yuzhenc

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值