AWS EMR集成Dolphinscheduler

在EC2部署Dolphinscheduler,向远程EMR集群提交作业

环境:

EMR 6.7.0集群

EC2 一台作为LDAP服务器

EC2 一台作为海豚安装节点

数据库使用RDS或者自己构建

描述:

在EC2中通过同步EMR yum源并安装相关客户端,以及同步emr中hadoop,spark,flink等配置文件,构建边缘节点

海豚基于此,就可以向远程emr集群正常提交任务

注意:

目前测试3.1.3,3.1.8,3.2.0
3.2.0部分代码修改有bug,普通用户无法获取应用程序包,dev目前已经修复此问题

安装脚本

dp_install.sh

#获取环境变量配置
getEnvConfig(){
	source /home/ec2-user/dolphinscheduler_env.sh
	
	if [ "$DP_VERSION" = "3.1.3" ] || [ "$DP_VERSION" = "3.1.8" ]; then
		echo "海豚版本为$DP_VERSION"
	elif [ "$DP_VERSION" = "3.2.0" ]; then
		echo "海豚版本为$DP_VERSION"
	else
		echo "错误:输入值不是3.1.3或3.1.8或3.2.0"
	    exit 1
    fi
}

#master,worker安装JAVA
#https://blog.csdn.net/renfufei/article/details/52621034
installJdk8IfNotExists() {
	ips_str=$IPS_ADDRESS
	ips_arr=(${ips_str//,/ })
	for nodeIp in ${ips_arr[@]}  
	do
		ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo rpm -q java-1.8.0-openjdk-devel &>/dev/null"
		ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo echo \"INSTALL OPEN JDK8\""
		ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo yum -y install java-1.8.0-openjdk-devel"
		ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo echo \"MAKE AND EXPORT JAVA ENV VARS\""
		ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk"
		ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp " echo \"export JAVA_HOME=$JAVA_HOME;export PATH=$JAVA_HOME/bin:$PATH\" | sudo tee -a /etc/profile.d/java.sh"
		ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "source /etc/profile.d/java.sh"
		#ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T ec2-user@$nodeIp "sudo -i -u ec2-user source /etc/profile.d/java.sh"
		ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo yum -y install vim wget zip unzip expect tree htop iotop nc telnet lrzsz openssl-devel emrfs emr-ddb emr-goodies emr-kinesis emr-s3-select emr-scripts emr-puppet"
	done
}

#安装Zookeeper
installZookeeper() {
	ips_str=$ZOOKEEPER_ADDRESS
	ips_arr=(${ips_str//,/ })
	
	index=1
	dateNow=$(date +%s)
	#echo "dataDir=/usr/lib/zookeeper/zkData" >>/tmp/zkNode.tmp.$dateNow
	#echo "clientPort=2181" >>/tmp/zkNode.tmp.$dateNow
	echo "maxClientCnxns=0" >>/tmp/zkNode.tmp.$dateNow
	for nodeIp in ${ips_arr[@]}
	do
	  echo "server.$index=$nodeIp:2888:3888" >>/tmp/zkNode.tmp.$dateNow
	  index=$[$index+1]
	done
	
	zkZooCfg=`cat /tmp/zkNode.tmp.$dateNow`
	mkdir /tmp/zookeeper
	wget https://archive.apache.org/dist/zookeeper/zookeeper-3.5.7/apache-zookeeper-3.5.7-bin.tar.gz -O /tmp/zookeeper/apache-zookeeper-3.5.7-bin.tar.gz


	myidIndex=1
	for nodeIp in ${ips_arr[@]}  
	do
		scp -o StrictHostKeyChecking=no -i $SSH_KEY -r /tmp/zookeeper/apache-zookeeper-3.5.7-bin.tar.gz $linuxUser@$nodeIp:/tmp &>/dev/null
		ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo tar -zxvf /tmp/apache-zookeeper-3.5.7-bin.tar.gz -C /tmp/"
		ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo cp -r /tmp/apache-zookeeper-3.5.7-bin /usr/lib/zookeeper"
		
		ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo cp /usr/lib/zookeeper/conf/zoo_sample.cfg /usr/lib/zookeeper/conf/zoo.cfg"
		ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo tee -a /usr/lib/zookeeper/conf/zoo.cfg <<EOF
$zkZooCfg
EOF"
		ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo sed -i \"s|dataDir=/tmp/zookeeper|dataDir=/usr/lib/zookeeper/zkData|g\" /usr/lib/zookeeper/conf/zoo.cfg"

		ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo mkdir /usr/lib/zookeeper/zkData"
		ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo touch /usr/lib/zookeeper/zkData/myid"
		ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo tee /usr/lib/zookeeper/zkData/myid <<EOF
$myidIndex
EOF"
		myidIndex=$[$myidIndex+1]
	done
	
	for nodeIp in ${ips_arr[@]}  
	do
		ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo kill -9 $(netstat -nlp | grep :2181 | awk '{print $7}' | awk -F"/" '{ print $1 }')"
		ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo /usr/lib/zookeeper/bin/zkServer.sh stop"
		ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo rm -rf /usr/lib/zookeeper/zkData/zookeeper_server.pid /usr/lib/zookeeper/zkData/version-2/ /usr/lib/zookeeper/zkData/log/"
	done
	
	for nodeIp in ${ips_arr[@]}  
	do
		ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo /usr/lib/zookeeper/bin/zkServer.sh start"
		sleep 5
		#查看状态
		ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo /usr/lib/zookeeper/bin/zkServer.sh status"
	done
	
}

#安装Mysql客户端
installMySqlCliIfNotExists() {
    mysql -V &>/dev/null
    if [ ! "$?" = "0" ]; then
        echo "INSTALL MYSQL CLI CLIENT FOR CONNECTIVITY TESTING"
        echo "MySQL client has not been installed yet, will install right now!"
        sudo yum -y install mysql-community-client --nogpgcheck
        if [ ! -f /tmp/mysql57-community-release-el7-11.noarch.rpm ]; then
            wget https://dev.mysql.com/get/mysql57-community-release-el7-11.noarch.rpm -P /tmp/
        fi
        sudo rpm -ivh /tmp/mysql57-community-release-el7-11.noarch.rpm
        sudo yum -y install mysql-community-client
    fi
}

#创建数据库并创建用户dolphinscheduler
MySqlCreateDatabaseAndUser() {
    echo "TEST MYSQL Create Database And User"
    installMySqlCliIfNotExists
    mysql -h$MYSQL_HOST -u$MYSQL_ROOT_USER -p$MYSQL_ROOT_PASSWORD -e "
	CREATE DATABASE IF NOT EXISTS $MYSQL_DPSCHEDULER_DB DEFAULT CHARACTER SET utf8 DEFAULT COLLATE utf8_general_ci;
	CREATE USER IF NOT EXISTS 'dolphinscheduler'@'%' IDENTIFIED BY 'dolphinscheduler';
	GRANT ALL PRIVILEGES ON $MYSQL_DPSCHEDULER_DB.* TO 'dolphinscheduler'@'%';
	flush privileges;
	" &>/dev/null
	
	if [ "$?" = "0" ]; then
        echo "Create SUCCESSFUL!!"
    else
        echo "Create FAILED!!"
        exit 1
    fi
}

#master,worker创建Dolphinscheduler用户,并配置彼此之间免密
createDolphinschedulerUser(){
	#配置root免密
	ips_str=$IPS_ADDRESS
	ips_arr=(${ips_str//,/ })
	for nodeIp in ${ips_arr[@]}  
	do
		ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo useradd dolphinscheduler"
		#后面的passwd也要sudo才行,否则没办法添加密码
		ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo echo "dolphinscheduler" |sudo passwd --stdin dolphinscheduler"
		ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo sed -i '\$adolphinscheduler  ALL=(ALL)  NOPASSWD: NOPASSWD: ALL' /etc/sudoers"
		ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo sed -i 's/Defaults    requirett/#Defaults    requirett/g' /etc/sudoers"
		ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo mkdir /mnt/module"
		ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo chown dolphinscheduler:dolphinscheduler /mnt/module"
		
		#连接到各节点查看是否有对应的密钥,如果存在则haveRsaPub会返回空,那么也就不会再创建ssh-keygen
		ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo [ ! -f /home/dolphinscheduler/.ssh/id_rsa.pub ] " \
		&& ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo -u dolphinscheduler ssh-keygen -t rsa"
		# Enter file in which to save the key (/home/dolphinscheduler/.ssh/id_rsa): 
		# Enter passphrase (empty for no passphrase)
		# Enter same passphrase again:
		
		#拷贝密钥到其他各个节点(除主节点外)
		if [ nodeIp != '$MASTER_ADDRESS' ]
		then
			ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo [ ! -f /home/$linuxUser/.ssh/id_rsa ] " \
			&& scp -o StrictHostKeyChecking=no -i $SSH_KEY -r /home/$linuxUser/.ssh/id_rsa $linuxUser@$nodeIp:/home/$linuxUser/.ssh/
			ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo [ ! -f /home/$linuxUser/.ssh/id_rsa ] " \
			&& ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo chmod 400 /home/$linuxUser/.ssh/id_rsa"
		fi
		                                                                
		ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo sed -i \"s|PasswordAuthentication no|PasswordAuthentication yes|g\" /etc/ssh/sshd_config"
		#ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T ec2-user@$nodeIp "sudo sed -i \"s|#PermitRootLogin yes|PermitRootLogin yes|g\" /etc/ssh/sshd_config"		
		
		ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo systemctl restart sshd"
	done
	
	
	#各个节点之间进行免密登录配置
	for nodeIp in ${ips_arr[@]}  
	do
		#安装expect用于接收参数
		ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "[ -z  \`sudo rpm -qa | grep expect\` ] "\
		&& ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo yum -y install expect"
		
		
		for sshIp in ${ips_arr[@]}  
		do
			ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo -u dolphinscheduler expect <<EOF
              #复制公钥到目标主机
              spawn ssh-copy-id dolphinscheduler@$sshIp
              expect {
                      #expect实现自动输入密码
                      \"yes/no\" { send \"yes\\n\";exp_continue } 
                      \"password\" { send \"dolphinscheduler\\n\";exp_continue }
                      eof
              }
EOF
"
		done

	done
	
}

downloadDolphinscheduler(){
	sudo wget "https://archive.apache.org/dist/dolphinscheduler/$DP_VERSION/apache-dolphinscheduler-${DP_VERSION}-bin.tar.gz" -O "/tmp/apache-dolphinscheduler-${DP_VERSION}-bin.tar.gz"
	sudo tar -zxvf "/tmp/apache-dolphinscheduler-${DP_VERSION}-bin.tar.gz" -C /mnt/module/
	sudo mv "/mnt/module/apache-dolphinscheduler-${DP_VERSION}-bin" /mnt/module/dolphinscheduler  
	sudo chown dolphinscheduler:dolphinscheduler /mnt/module/dolphinscheduler/ -R
	sudo yum install -y psmisc
}


syncMasterFlinkAndYarnConf2(){
  #从emr master节点同步flink及yarn相关配置
  #masterNode=$EmrMasterNode
  
  #将密钥拷贝给多个work节点(上面已经做了,各个节点彼此免密的时候)
  
  ips_str=$WORKERS_ADDRESS
  ips_arr=(${ips_str//,/ })
  for nodeIp in ${ips_arr[@]} 
  do
    # 各个worker节点从master节点同步yum源
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo scp -o StrictHostKeyChecking=no -i $SSH_KEY hadoop@$masterNode:/etc/yum.repos.d/*.repo /etc/yum.repos.d/"
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo scp -o StrictHostKeyChecking=no -i $SSH_KEY hadoop@$masterNode:/var/aws/emr/repoPublicKey.txt ."
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo mkdir -p /var/aws/emr/"
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo mv repoPublicKey.txt /var/aws/emr/repoPublicKey.txt"
	
	
	#ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "yum -y install vim wget zip unzip expect tree htop iotop nc telnet lrzsz openssl-devel emrfs emr-ddb emr-goodies emr-kinesis emr-s3-select emr-scripts emr-puppet"
  
	# 创建s3,tmp
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo mkdir -p /mnt/s3"
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo chmod 777 -R /mnt/s3"
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo mkdir -p /mnt/tmp"
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo chmod 777 -R /mnt/tmp"
    
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo mv /tmp /tmp-backup"
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo ln -s /mnt/tmp /tmp"
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo mv /tmp-backup/* /tmp"
	
	#-a追加,不使用-a则覆盖
	tee  /tmp/createUser.sh <<EOF
makeUser() {
  # add group if not exists
  user="hadoop"
  group="hadoop"

  sudo egrep "^\$group\:" /etc/group >&/dev/null
  if [ "\$\?" != "0" ]; then
    sudo groupadd "\$group"
    echo "Group: \$group is added."
  fi

  # add user if not exists and set password
  sudo egrep "^\$user\:" /etc/passwd >&/dev/null
  if [ "\$\?" != "0" ]; then
    sudo useradd -g "\$group" "\$user"
    echo "User: "\$user" is added."
  fi
  # enable all users of bdp group can as hdfs.
  echo "\$user ALL = (ALL) NOPASSWD: ALL" | sudo tee /etc/sudoers.d/hadoop
}

makeHadoopUser() {
  makeUser
  sudo mkdir -p /home/"\$user"/.ssh
  sudo chown "\$user":"\$group" /home/"\$user"/.ssh
  sudo chmod 700 /home/"\$user"/.ssh
  sudo cp "$SSH_KEY" /home/"\$user"/.ssh/id_isa
  sudo chown "\$user":"\$group" /home/"\$user"/.ssh/id_isa
  sudo chmod 600 /home/"\$user"/.ssh/id_isa
}

makeHadoopUser 

EOF
	
	# 安装hadoop
	scp -o StrictHostKeyChecking=no -i $SSH_KEY -r /tmp/createUser.sh $linuxUser@$nodeIp:/tmp/
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo chmod +x /tmp/createUser.sh && bash /tmp/createUser.sh"
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo yum -y install hadoop-client hadoop-lzo"
	
	# 同步hadoop配置信息
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo rsync -avz --delete -e \"ssh -o StrictHostKeyChecking=no -o ServerAliveInterval=10 -i $SSH_KEY \" hadoop@$masterNode:'/etc/hadoop/conf/*' /etc/hadoop/conf "
	
	# 安装flink
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo yum install -y flink"
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo mkdir -p /etc/flink"
	
	# 同步flink
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo rsync -avz --delete -e \"ssh -o StrictHostKeyChecking=no -o ServerAliveInterval=10 -i $SSH_KEY \" hadoop@$masterNode:'/etc/flink/*' /etc/flink/ "
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo mkdir -p /var/lib/flink/yarn/"
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo chmod 777 /var/lib/flink/yarn"
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo mkdir -p /usr/lib/flink/log"
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo chown flink:flink /usr/lib/flink/log -R"
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo chmod 777 /usr/lib/flink/log -R"
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo mkdir -p /var/log/flink-cli"
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo chown flink:flink /var/log/flink-cli -R"
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo chmod 777 /var/log/flink-cli -R"
	
	# 同步spark
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo yum -y install spark-core spark-python spark-datanucleus"
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo rsync -avz --delete -e \"ssh -o StrictHostKeyChecking=no -o ServerAliveInterval=10 -i $SSH_KEY \" hadoop@$masterNode:'/etc/spark/conf/* ' /etc/spark/conf"
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo echo \"spark.hadoop.yarn.timeline-service.enabled false\" | tee -a /etc/spark/conf/spark-defaults.conf"
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo mkdir -p /var/log/spark/user"
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo chown spark:spark /usr/log/spark/user -R"
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo chmod 777 -R /var/log/spark/user"
	
	# 安装sqoop
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo yum -y install sqoop"
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo wget https://repo.maven.apache.org/maven2/mysql/mysql-connector-java/5.1.47/mysql-connector-java-5.1.47.jar -O /usr/lib/sqoop/lib/mysql-connector-java-5.1.47.jar"
	
	# 安装S3
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo yum -y install emrfs s3-dist-cp"
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo rsync -avz --delete -e \"ssh -o StrictHostKeyChecking=no -o ServerAliveInterval=10 -i $SSH_KEY \" hadoop@$masterNode:'/usr/share/aws/emr/emrfs/conf/*' /usr/share/aws/emr/emrfs/conf/  "
	
	# 安装Hive
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo yum -y install tez hive hive-hcatalog"
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo rsync -avz --delete -e \"ssh -o StrictHostKeyChecking=no -o ServerAliveInterval=10 -i $SSH_KEY \" hadoop@$masterNode:'/etc/hive/conf/*' /etc/hive/conf"
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo rsync -avz --delete -e \"ssh -o StrictHostKeyChecking=no -o ServerAliveInterval=10 -i $SSH_KEY \" hadoop@$masterNode:'/etc/hive-hcatalog/conf/*' /etc/hive-hcatalog/conf"
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo rsync -avz --delete -e \"ssh -o StrictHostKeyChecking=no -o ServerAliveInterval=10 -i $SSH_KEY \" hadoop@$masterNode:'/etc/tez/conf/*' /etc/tez/conf"
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo mkdir -p /var/log/hive/user"
	ssh -o StrictHostKeyChecking=no -i $SSH_KEY -T $linuxUser@$nodeIp "sudo chmod 777 -R /var/log/hive/user"
	
  done 
}


printHeading() {
  title="$1"
  paddingWidth=$((($(tput cols) - ${#title}) / 2 - 3))
  printf "\n%${paddingWidth}s" | tr ' ' '='
  printf "  $title  "
  printf "%${paddingWidth}s\n\n" | tr ' ' '='
}

installDolphinscheduler(){	
	#zookeeper路径
	zkURL=()
	ips_str=$ZOOKEEPER_ADDRESS
	ips_arr=(${ips_str//,/ })
	
	index=0
	for nodeIp in ${ips_arr[@]}
	do
		zkURL[index]=$nodeIp":2181"
		index=$[$index+1]
	done
	
	zkURLStr=$(IFS=,; echo "${zkURL[*]}")
	

	#配置DolphScheduler install_env.sh文件
	sudo -u dolphinscheduler sed -i "s|ds1,ds2,ds3,ds4,ds5|$IPS_ADDRESS|g" /mnt/module/dolphinscheduler/bin/env/install_env.sh
	sudo -u dolphinscheduler sed -i "s|ds1,ds2|$MASTER_ADDRESS|g" /mnt/module/dolphinscheduler/bin/env/install_env.sh
	sudo -u dolphinscheduler sed -i "s|ds1:default,ds2:default,ds3:default,ds4:default,ds5:default|$WORKERS_ADDRESS_DEFAULT|g" /mnt/module/dolphinscheduler/bin/env/install_env.sh
	sudo -u dolphinscheduler sed -i "s|ds3|$ALERTSERVERS_ADDRESS|g" /mnt/module/dolphinscheduler/bin/env/install_env.sh
	sudo -u dolphinscheduler sed -i "s|ds1|$APISERVERS_ADDRESS|g" /mnt/module/dolphinscheduler/bin/env/install_env.sh
	sudo -u dolphinscheduler sed -i "s|/tmp/dolphinscheduler|~/dolphinscheduler|g" /mnt/module/dolphinscheduler/bin/env/install_env.sh
	sudo -u dolphinscheduler sed -i "s|dolphinscheduler|dolphinscheduler|g" /mnt/module/dolphinscheduler/bin/env/install_env.sh
	
	
	#配置DolphScheduler dolphinscheduler_env.sh文件
	if [ "$DP_VERSION" = "3.1.3" ] || [ "$DP_VERSION" = "3.1.8" ]; then
		echo "海豚版本为$DP_VERSION"
		sudo -u dolphinscheduler sed -i "s|/opt/java/openjdk|$JAVA_HOME|g" /mnt/module/dolphinscheduler/bin/env/dolphinscheduler_env.sh
		sudo -u dolphinscheduler sed -i "s|-postgresql|-mysql|g" /mnt/module/dolphinscheduler/bin/env/dolphinscheduler_env.sh
		sudo -u dolphinscheduler sed -i "s|SPRING_DATASOURCE_URL|SPRING_DATASOURCE_URL=jdbc:mysql://$MYSQL_HOST:3306/$MYSQL_DPSCHEDULER_DB|g" /mnt/module/dolphinscheduler/bin/env/dolphinscheduler_env.sh
		sudo -u dolphinscheduler sed -i "s|SPRING_DATASOURCE_USERNAME|SPRING_DATASOURCE_USERNAME=dolphinscheduler|g" /mnt/module/dolphinscheduler/bin/env/dolphinscheduler_env.sh
		sudo -u dolphinscheduler sed -i "s|SPRING_DATASOURCE_PASSWORD|SPRING_DATASOURCE_PASSWORD=dolphinscheduler|g" /mnt/module/dolphinscheduler/bin/env/dolphinscheduler_env.sh
		
		sudo -u dolphinscheduler sed -i "s|localhost:2181|$zkURLStr|g" /mnt/module/dolphinscheduler/bin/env/dolphinscheduler_env.sh
		
		sudo -u dolphinscheduler sed -i "s|/opt/soft/hadoop|/usr/lib/hadoop|g" /mnt/module/dolphinscheduler/bin/env/dolphinscheduler_env.sh
		sudo -u dolphinscheduler sed -i "s|/opt/soft/hadoop/etc/hadoop|/usr/lib/hadoop/etc/hadoop|g" /mnt/module/dolphinscheduler/bin/env/dolphinscheduler_env.sh
		sudo -u dolphinscheduler sed -i "s|/opt/soft/spark1|/usr/lib/spark|g" /mnt/module/dolphinscheduler/bin/env/dolphinscheduler_env.sh
		sudo -u dolphinscheduler sed -i "s|/opt/soft/python|/usr/bin/python3.7|g" /mnt/module/dolphinscheduler/bin/env/dolphinscheduler_env.sh
		sudo -u dolphinscheduler sed -i "s|/opt/soft/hive|/usr/lib/hive|g" /mnt/module/dolphinscheduler/bin/env/dolphinscheduler_env.sh
		sudo -u dolphinscheduler sed -i "s|/opt/soft/flink|/usr/lib/flink|g" /mnt/module/dolphinscheduler/bin/env/dolphinscheduler_env.sh
		sudo -u dolphinscheduler sed -i '/^export HIVE_HOME=/a export SQOOP_HOME=${SQOOP_HOME:-/usr/lib/sqoop}' /mnt/module/dolphinscheduler/bin/env/dolphinscheduler_env.sh
		sudo -u dolphinscheduler sed -i 's|export PATH=|export PATH=$SQOOP_HOME/bin:|' /mnt/module/dolphinscheduler/bin/env/dolphinscheduler_env.sh
	elif [ "$DP_VERSION" = "3.2.0" ]; then
		echo "海豚版本为$DP_VERSION"
		sudo -u dolphinscheduler sed -i '$a\export JAVA_HOME=${JAVA_HOME:-}' /mnt/module/dolphinscheduler/bin/env/dolphinscheduler_env.sh
		sudo -u dolphinscheduler sed -i '$a\export DATABASE=${DATABASE:-mysql}' /mnt/module/dolphinscheduler/bin/env/dolphinscheduler_env.sh
		sudo -u dolphinscheduler sed -i '$a\export SPRING_PROFILES_ACTIVE=${DATABASE}' /mnt/module/dolphinscheduler/bin/env/dolphinscheduler_env.sh
		sudo -u dolphinscheduler sed -i "\$a\\export SPRING_DATASOURCE_URL=jdbc:mysql://$MYSQL_HOST:3306/$MYSQL_DPSCHEDULER_DB" /mnt/module/dolphinscheduler/bin/env/dolphinscheduler_env.sh
		sudo -u dolphinscheduler sed -i '$a\export SPRING_DATASOURCE_USERNAME=dolphinscheduler' /mnt/module/dolphinscheduler/bin/env/dolphinscheduler_env.sh
		sudo -u dolphinscheduler sed -i '$a\export SPRING_DATASOURCE_PASSWORD=dolphinscheduler' /mnt/module/dolphinscheduler/bin/env/dolphinscheduler_env.sh
		sudo -u dolphinscheduler sed -i '$a\export SPRING_CACHE_TYPE=${SPRING_CACHE_TYPE:-none}' /mnt/module/dolphinscheduler/bin/env/dolphinscheduler_env.sh
		sudo -u dolphinscheduler sed -i '$a\export SPRING_JACKSON_TIME_ZONE=${SPRING_JACKSON_TIME_ZONE:-UTC}' /mnt/module/dolphinscheduler/bin/env/dolphinscheduler_env.sh
		sudo -u dolphinscheduler sed -i '$a\export MASTER_FETCH_COMMAND_NUM=${MASTER_FETCH_COMMAND_NUM:-10}' /mnt/module/dolphinscheduler/bin/env/dolphinscheduler_env.sh
		sudo -u dolphinscheduler sed -i '$a\export REGISTRY_TYPE=${REGISTRY_TYPE:-zookeeper}' /mnt/module/dolphinscheduler/bin/env/dolphinscheduler_env.sh
		sudo -u dolphinscheduler sed -i '$a\export REGISTRY_ZOOKEEPER_CONNECT_STRING=${REGISTRY_ZOOKEEPER_CONNECT_STRING:-$zkURLStr:2181}' /mnt/module/dolphinscheduler/bin/env/dolphinscheduler_env.sh
		sudo -u dolphinscheduler sed -i '$a\export HADOOP_HOME=${HADOOP_HOME:-/usr/lib/hadoop}' /mnt/module/dolphinscheduler/bin/env/dolphinscheduler_env.sh
		sudo -u dolphinscheduler sed -i '$a\export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/usr/lib/hadoop/etc/hadoop}' /mnt/module/dolphinscheduler/bin/env/dolphinscheduler_env.sh
		sudo -u dolphinscheduler sed -i '$a\export SPARK_HOME1=${SPARK_HOME1:-/usr/lib/spark}' /mnt/module/dolphinscheduler/bin/env/dolphinscheduler_env.sh
		sudo -u dolphinscheduler sed -i '$a\export SPARK_HOME2=${SPARK_HOME2:-/opt/soft/spark2}' /mnt/module/dolphinscheduler/bin/env/dolphinscheduler_env.sh
		sudo -u dolphinscheduler sed -i '$a\export PYTHON_HOME=${PYTHON_HOME:-/usr/bin/python3.7}' /mnt/module/dolphinscheduler/bin/env/dolphinscheduler_env.sh
		sudo -u dolphinscheduler sed -i '$a\export HIVE_HOME=${HIVE_HOME:-/usr/lib/hive}' /mnt/module/dolphinscheduler/bin/env/dolphinscheduler_env.sh
		sudo -u dolphinscheduler sed -i '$a\export SQOOP_HOME=${SQOOP_HOME:-/usr/lib/sqoop}' /mnt/module/dolphinscheduler/bin/env/dolphinscheduler_env.sh
		sudo -u dolphinscheduler sed -i '$a\export FLINK_HOME=${FLINK_HOME:-/usr/lib/flink}' /mnt/module/dolphinscheduler/bin/env/dolphinscheduler_env.sh
		sudo -u dolphinscheduler sed -i '$a\export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax}' /mnt/module/dolphinscheduler/bin/env/dolphinscheduler_env.sh
		sudo -u dolphinscheduler sed -i '$a\export SEATUNNEL_HOME=${SEATUNNEL_HOME:-/opt/soft/seatunnel}' /mnt/module/dolphinscheduler/bin/env/dolphinscheduler_env.sh
		sudo -u dolphinscheduler sed -i '$a\export CHUNJUN_HOME=${CHUNJUN_HOME:-/opt/soft/chunjun}' /mnt/module/dolphinscheduler/bin/env/dolphinscheduler_env.sh
		sudo -u dolphinscheduler sed -i '$a\export PATH=$SQOOP_HOME/bin:$HADOOP_HOME/bin:$SPARK_HOME1/bin:$SPARK_HOME2/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$SEATUNNEL_HOME/bin:$CHUNJUN_HOME/bin:$PATH' /mnt/module/dolphinscheduler/bin/env/dolphinscheduler_env.sh
	else
		echo "错误:输入值不是3.1.3或3.1.8或3.2.0"
	    exit 1
    fi
	
	
	#安装mysql驱动包
	sudo -u dolphinscheduler mkdir /mnt/module/jars
	sudo chown dolphinscheduler:dolphinscheduler /mnt/module/jars/
	cd /mnt/module/jars/
	sudo -u dolphinscheduler wget https://repo1.maven.org/maven2/mysql/mysql-connector-java/8.0.28/mysql-connector-java-8.0.28.jar -O /mnt/module/jars/mysql-connector-java-8.0.28.jar

	sudo -u dolphinscheduler cp /mnt/module/jars/mysql-connector-java-8.0.28.jar /mnt/module/dolphinscheduler/tools/libs/
	sudo -u dolphinscheduler cp /mnt/module/jars/mysql-connector-java-8.0.28.jar /mnt/module/dolphinscheduler/api-server/libs/
	sudo -u dolphinscheduler cp /mnt/module/jars/mysql-connector-java-8.0.28.jar /mnt/module/dolphinscheduler/alert-server/libs/
	sudo -u dolphinscheduler cp /mnt/module/jars/mysql-connector-java-8.0.28.jar /mnt/module/dolphinscheduler/master-server/libs/
	sudo -u dolphinscheduler cp /mnt/module/jars/mysql-connector-java-8.0.28.jar /mnt/module/dolphinscheduler/worker-server/libs/

	
	#对以下路径的进行配置:api-server/conf/common.properties 和 worker-server/conf/common.properties
	if [ "$DP_VERSION" = "3.1.3" ] || [ "$DP_VERSION" = "3.1.8" ]; then
		echo "海豚版本为$DP_VERSION"
		sudo sed -i "s|resource.storage.type=NONE|resource.storage.type=${STORAGE_TYPE}|g" /mnt/module/dolphinscheduler/api-server/conf/common.properties
		sudo sed -i "s|resource.storage.type=NONE|resource.storage.type=${STORAGE_TYPE}|g" /mnt/module/dolphinscheduler/worker-server/conf/common.properties
	elif [ "$DP_VERSION" = "3.2.0" ]; then
		echo "海豚版本为$DP_VERSION"
		sudo sed -i "s|resource.storage.type=LOCAL|resource.storage.type=${STORAGE_TYPE}|g" /mnt/module/dolphinscheduler/api-server/conf/common.properties
		sudo sed -i "s|resource.storage.type=LOCAL|resource.storage.type=${STORAGE_TYPE}|g" /mnt/module/dolphinscheduler/worker-server/conf/common.properties
	else
		echo "错误:输入值不是3.1.3或3.1.8或3.2.0"
	    exit 1
    fi
	
	sudo sed -i "s|resource.hdfs.fs.defaultFS=hdfs://mycluster:8020|resource.hdfs.fs.defaultFS=${FS_DEFAULT_FS}|g" /mnt/module/dolphinscheduler/api-server/conf/common.properties
	sudo sed -i "s|resource.aws.access.key.id=minioadmin|resource.aws.access.key.id=${ACCESS_KEY_ID}|g" /mnt/module/dolphinscheduler/api-server/conf/common.properties
	sudo sed -i "s|resource.aws.secret.access.key=minioadmin|resource.aws.secret.access.key=${ACCESS_KEY}|g" /mnt/module/dolphinscheduler/api-server/conf/common.properties
	sudo sed -i "s|resource.aws.region=cn-north-1|resource.aws.region=${AWS_REGION}|g" /mnt/module/dolphinscheduler/api-server/conf/common.properties
	sudo sed -i "s|resource.aws.s3.bucket.name=dolphinscheduler|resource.aws.s3.bucket.name=${BUCKET_NAME}|g" /mnt/module/dolphinscheduler/api-server/conf/common.properties
	sudo sed -i "s|resource.aws.s3.endpoint=http://localhost:9000|resource.aws.s3.endpoint=${S3_ENDPOINT}|g" /mnt/module/dolphinscheduler/api-server/conf/common.properties

	
	sudo sed -i "s|resource.hdfs.fs.defaultFS=hdfs://mycluster:8020|resource.hdfs.fs.defaultFS=${FS_DEFAULT_FS}|g" /mnt/module/dolphinscheduler/worker-server/conf/common.properties
	sudo sed -i "s|resource.aws.access.key.id=minioadmin|resource.aws.access.key.id=${ACCESS_KEY_ID}|g" /mnt/module/dolphinscheduler/worker-server/conf/common.properties
	sudo sed -i "s|resource.aws.secret.access.key=minioadmin|resource.aws.secret.access.key=${ACCESS_KEY}|g" /mnt/module/dolphinscheduler/worker-server/conf/common.properties
	sudo sed -i "s|resource.aws.region=cn-north-1|resource.aws.region=${AWS_REGION}|g" /mnt/module/dolphinscheduler/worker-server/conf/common.properties
	sudo sed -i "s|resource.aws.s3.bucket.name=dolphinscheduler|resource.aws.s3.bucket.name=${BUCKET_NAME}|g" /mnt/module/dolphinscheduler/worker-server/conf/common.properties
	sudo sed -i "s|resource.aws.s3.endpoint=http://localhost:9000|resource.aws.s3.endpoint=${S3_ENDPOINT}|g" /mnt/module/dolphinscheduler/worker-server/conf/common.properties


	#添加对LDAP的支持
	sudo sed -i "s/type: PASSWORD/type: ${SECURITY_TYPE}/" /mnt/module/dolphinscheduler/api-server/conf/application.yaml
	sudo sed -i "s/urls: ldap:\/\/ldap\.forumsys\.com:389\//urls: ldap:\/\/${SECURITY_URLS}\//" /mnt/module/dolphinscheduler/api-server/conf/application.yaml
	sudo sed -i "s/base-dn: dc=example,dc=com/base-dn: ${SECURITY_BASE_DN}/" /mnt/module/dolphinscheduler/api-server/conf/application.yaml
	sudo sed -i "s/username: cn=read-only-admin,dc=example,dc=com/username: ${SECURITY_USERNAME}/" /mnt/module/dolphinscheduler/api-server/conf/application.yaml
	sudo sed -i "s/password: password/password: ${SECURITY_PASSWORD}/" /mnt/module/dolphinscheduler/api-server/conf/application.yaml
	sudo sed -i "s/admin: read-only-admin/admin: ${SECURITY_ADMIN}/" /mnt/module/dolphinscheduler/api-server/conf/application.yaml
	sudo sed -i "s/identity-attribute: uid/identity-attribute: ${SECURITY_ATTRIBUTE}/" /mnt/module/dolphinscheduler/api-server/conf/application.yaml
	sudo sed -i "s/email-attribute: mail/email-attribute: ${SECURITY_EMAIL}/" /mnt/module/dolphinscheduler/api-server/conf/application.yaml
	sudo sed -i "s/not-exist-action: CREATE/not-exist-action: ${SECURITY_NO_EXIST_ACTION}/" /mnt/module/dolphinscheduler/api-server/conf/application.yaml

	# resource store on HDFS/S3 path, resource file will store to this base path, self configuration, please make sure the directory exists on hdfs and have read write permissions. "/dolphinscheduler" is recommended
	# 修改 resource.storage.type=HDFS 和 resource.hdfs.fs.defaultFS=file:///。
	
	#初始化数据库
	sudo -u dolphinscheduler bash /mnt/module/dolphinscheduler/tools/bin/upgrade-schema.sh
	#启动dolphinscheduler
	sudo -u dolphinscheduler bash /mnt/module/dolphinscheduler/bin/install.sh
	
	echo "web ui地址:http://xxxxx:12345/dolphinscheduler/ui"
	echo "账号密码:admin/dolphinscheduler123"
	
	echo "hadoop查看:sudo -u hadoop hadoop fs -ls /"
	echo "flink测试(emr mster节点查看yarn是否部署flink作业):sudo -u hadoop /usr/lib/flink/bin/flink run -t yarn-per-job /usr/lib/flink/examples/streaming/WordCount.jar"
}


getEnvConfig
installJdk8IfNotExists
installZookeeper
installMySqlCliIfNotExists
MySqlCreateDatabaseAndUser
createDolphinschedulerUser
downloadDolphinscheduler
syncMasterFlinkAndYarnConf2
installDolphinscheduler

安装准备

1,准备变量相关脚本

dolphinscheduler_env.sh

#海豚版本 3.1.3或3.1.8或3.2.0
export DP_VERSION='3.1.8'

#数据库相关配置
export MYSQL_HOST=10.0.16.130
export MYSQL_ROOT_USER=root
export MYSQL_ROOT_PASSWORD='xxxxx'
export MYSQL_DPSCHEDULER_DB=dolphinscheduler318

export linuxUser=`whoami`

#dp各服务安装节点(目前所有都安装在一个节点,可根据配置进行集群部署)
export ZOOKEEPER_ADDRESS=`hostname -f`
export IPS_ADDRESS=`hostname -f`
export MASTER_ADDRESS=`hostname -f`
export WORKERS_ADDRESS=`hostname -f`
export WORKERS_ADDRESS_DEFAULT=`hostname -f`':default'
export ALERTSERVERS_ADDRESS=`hostname -f`
export APISERVERS_ADDRESS=`hostname -f`

#id_rsa密钥文件需要能够正常连接到emr,以及dp各节点
export SSH_KEY=/home/$linuxUser/.ssh/id_rsa
#master节点ip
export masterNode=10.0.23.20

#资源中心使用s3存储
export STORAGE_TYPE='S3'
export FS_DEFAULT_FS='s3a://s3桶名'
export ACCESS_KEY_ID='xxxxxx'
export ACCESS_KEY='xxxxxx'
export AWS_REGION='ap-southeast-2'
export BUCKET_NAME='s3桶名'
export S3_ENDPOINT='http://S3.ap-southeast-2.amazonaws.com'

#结合LDAP用户认证
export SECURITY_TYPE='LDAP'
export SECURITY_URLS='10.0.24.186:389'
export SECURITY_BASE_DN='dc=example,dc=com'
export SECURITY_USERNAME='cn=hue,ou=services,dc=example,dc=com'
export SECURITY_PASSWORD='123456'
export SECURITY_ADMIN='huchao'
export SECURITY_ATTRIBUTE='uid'
export SECURITY_EMAIL='cn'
export SECURITY_NO_EXIST_ACTION='CREATE'

2,上传pem文件

mv /home/ec2-user/ec2.pem /home/ec2-user/.ssh/id_rsa 
chmod 400 /home/ec2-user/.ssh/id_rsa 
#window到linux问题
sed -i 's/\r//' /home/ec2-user/dp_install.sh

开始安装

sh dp_install.sh

测试调度

1,环境准备

创建用户对应的租户,并将用户绑定到租户
hdfs上需要创建对应用户的目录

hadoop fs -mkdir /user/huchao
hadoop fs -chown huchao:huchao /user/huchao

2,部署作业

3.1.3版本:将用户绑定到租户就可以了
3.2.0版本不太一样:需要在部署作业的时候,再勾选租户,否则会使用默认default租户,从而使用dolphinscheduler用户提交任务了

(1)测试flink action
flink_action_deploy

#上传jar包
org.apache.flink.streaming.examples.wordcount.WordCount

flink_action_deploy_work_stream
(2)测试flink shell
flink_shell_deploy

#注意需要添加资源
flink run -t yarn-per-job work_jars/WordCount.jar

flink_shell_deploy_work_stream
(3)测试spark action
spark_action_deploy

#上传jar包
#Spark版本选择spark1
org.apache.spark.examples.SparkPi

spark_action_deploy_work_stream
(4)测试spark shell
spark_shell_deploy

#注意需要添加资源
spark-submit --master yarn --deploy-mode cluster --num-executors 1 --driver-memory 4g --executor-memory 4g --executor-cores 3 --class org.apache.spark.examples.SparkPi work_jars/spark-examples_2.12-3.2.1-amzn-0.jar

spark_shell_deploy_work_stream
(5)准备hive表
CREATE TABLE `test_stu`(
  `name` string COMMENT 'name',
  `age` bigint COMMENT 'age')
ROW FORMAT SERDE
  'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
STORED AS INPUTFORMAT
  'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat';

insert into test_stu values('zhangsan',1),('lisi',2),('wangwu',3);
(6)测试hive action
hive_action_deploy

select name,count(*) as ct from test_stu group by name;

hive_action_deploy_work_stream
(7)测试hive shell
hive_shell_deploy

beeline -u jdbc:hive2://ip-10-0-23-20:10000 -n huchao -e "select name,count(*) as ct from test_stu group by name;"

hive_shell_deploy_work_stream
(8)测试sqoop shell

#注意,这里将数据导入s3,需要确保这台ec2的role具有读写s3的权限

sqoop_shell_deploy

sqoop import \
--connect  jdbc:mysql://10.0.16.130:3306/hive \
--username hive --password hive \
--table DBS \
--target-dir s3://s3桶名/hive-dbs/ \
--fields-terminated-by "\t"  \
--delete-target-dir \
-m 1  \
--hive-drop-import-delims \
--as-textfile

sqoop_shell_deploy_work_stream
(9)测试sqoop action

需要创建数据源 mysql

sqoop_action_deploy

任务名称:sqoop_mysql_to_s3
流向:import
类型:MYSQL
数据源:MYSQL
表名:DBS
类型: HDFS
目标路径: s3://s3桶名/hive-dbs/
压缩类型: gzip
保存格式: text

sqoop_action_deploy_work_stream
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值