一、单机安装
# 解压spark包和scala包
tar -zxf /opt/install/spark-3.1.2-bin-hadoop3.2.tgz -C /opt/soft/
tar -zxf /opt/install/scala-2.12.10.tgz -C /opt/soft/
# 修改名称
mv /opt/soft/spark-3.1.2-bin-hadoop3.2/ /opt/soft/spark312
mv /opt/soft/scala-2.12.10/ /opt/soft/scala212
# 设置环境变量
vim /etc/profile
# SCALA_HOME
export SCALA_HOME=/opt/soft/scala212
export PATH=$PATH:$SCALA_HOME/bin
# SPARK_HOME
export SPARK_HOME=/opt/soft/spark312
export PATH=$PATH:$SPARK_HOME/bin
# 重新加载环境变量
source /etc/profile
# 进入sparkShell环境
spark-shell
########## 以下可配置可不配(或者根据自己的服务器进行配置)
# 还原env文件
cp /opt/soft/spark312/conf/spark-env.sh.template /opt/soft/spark312/conf/spark-env.sh
# 修改env文件
vim /opt/soft/spark312/conf/spark-env.sh
# 配置scala的目录
export SCALA_HOME=/opt/soft/scala212
# 配置jdk的目录
export JAVA_HOME=/opt/soft/jdk180
# 配置spark的目录
export SPARK_HOME=/opt/soft/spark312
# 配置hadoop的安装路径
export HADOOP_INSTALL=/opt/soft/hadoop313
# 配置hadoop的配置文件目录
export HADOOP_CONF_DIR=$HADOOP_INSTALL/etc/hadoop
# 配置SparkMaster的主机名或ip
export SPARK_MASTER_IP=xsqone144
# 配置spark的Driver的内存
export SPARK_DRIVER_MEMORY=2G
# 配置spark的本机路径
export SPARK_LOCAL_DIRS=/opt/soft/spark312
# 配置spark的executor的内存
export SPARK_EXECUTOR_MEMORY=2G
二、搭建集群
多台机器配置一致
先搭建zookeeper:目录四
# 解压spark包和scala包
tar -zxf /opt/install/spark-3.1.2-bin-hadoop3.2.tgz -C /opt/soft/
tar -zxf /opt/install/scala-2.12.10.tgz -C /opt/soft/
# 修改名称
mv /opt/soft/spark-3.1.2-bin-hadoop3.2/ /opt/soft/spark312
mv /opt/soft/scala-2.12.10/ /opt/soft/scala212
# 设置环境变量
vim /etc/profile
# SCALA_HOME
export SCALA_HOME=/opt/soft/scala212
export PATH=$PATH:$SCALA_HOME/bin
# SPARK_HOME
export SPARK_HOME=/opt/soft/spark312
export PATH=$PATH:$SPARK_HOME/bin
# 重新加载环境变量
source /etc/profile
# 还原集群work节点文件
cp /opt/soft/spark312/conf/workers.template /opt/soft/spark312/conf/workers
# 配置spark集群的workers文件(此处根据自己配置进行修改)
vim /opt/soft/spark312/conf/workers
xsqone31
xsqone32
xsqone33
# 还原spark的env文件
cp /opt/soft/spark312/conf/spark-env.sh.template /opt/soft/spark312/conf/spark-env.sh
# 进行配置设置
vim /opt/soft/spark312/conf/spark-env.sh
#Master 监控页面默认访问端口为 8080,但是可能会和 Zookeeper 冲突,所以改成 8989,也可以自定义,访问 UI 监控页面时请注意
SPARK_MASTER_WEBUI_PORT=8989
export SPARK_DAEMON_JAVA_OPTS="
-Dspark.deploy.recoveryMode=ZOOKEEPER
-Dspark.deploy.zookeeper.url=xsqone31,xsqone32,xsqone33
-Dspark.deploy.zookeeper.dir=/spark"
# 启动spark集群
/opt/soft/spark312/sbin/start-all.sh
# 启动spark的第二个服务器的Master服务
/opt/soft/spark312/sbin/start-master.sh
此处可以查看xsqone31的Master的状态为启用状态
xsqone32的Master的状态为备用状态
此处将xsqone31的Master进程kill掉,查看xsqone32会不会变成ALIVE
这里xsqone32的master状态变为ALIVE状态,说明集群高可用搭建成功。