spark的集群安装
1 上传解压
1.1 hadoop2.7.7环境,安装spark3.1.3
[root@hadoop111 software]
1.2 重命名
[root@hadoop111 module]
1.3删除cmd文件(可选)
rm -rf *.cmd
2 修改配置文件
2.1 spark-env.sh
cd /opt/module/spark-3.1.3/conf/
vim spark-env.sh
export JAVA_HOME=/opt/module/jdk1.8
SPARK_MASTER_HOST=hadoop111
SPARK_MASTER_PORT=7077
SPARK_MASTER_WEBUI_PORT=8080
2.2 spark-defaults.conf
cp spark-defaults.conf.template spark-defaults.conf
vim spark-defaults.conf
spark.master spark://hadoop111:7077
2.3 workers
cp workers.template workers
vim workers
hadoop111
hadoop112
hadoop113
2.4 修改spark的start-all.sh和stop-all.sh文件名
[root@hadoop111 sbin]
[root@hadoop111 sbin]
3 分发
/home/lizi/bin/xsync /opt/module/spark-3.1.3
3.1 脚本2
#!/bin/bash
MY_HOSTNAME=`hostname`
HOSTS=`cat /etc/hosts | grep bd | awk '{print $2}' | grep -v $MY_HOSTNAME`
for HOST in $HOSTS
do
echo "正在将${1}发送到${HOST}的${2}..."
scp -rq $1 $HOST:$2
echo "done.."
done
4 配置环境变量
echo 'export SPARK_HOME=/opt/module/spark-3.1.3' >> /etc/profile.d/my_env.sh
echo 'export PATH=$SPARK_HOME/bin:$SPARK_HOME/sbin:$PATH' >> /etc/profile.d/my_env.sh
source /etc/profile
5 启动环境
start-spark-all.sh
6 Spark-shell
spark-shellCOPY