安装环境
基于上文中搭建的hadoop环境
https://blog.csdn.net/rachelfffy/article/details/141157763?spm=1001.2014.3001.5502
安装包
基于hadoop的安装包:spark-3.2.2-bin-hadoop3.2.tgz
上传安装包并解压,赋予用户权限
三台虚拟机上均需进行
root@hadoop002:~# tar -xvf spark-3.2.2-bin-hadoop3.2.tgz -C /opt
root@hadoop002:~# chown -R hadoop:hadoop /opt/spark-3.2.2-bin-hadoop3.2/
添加环境变量
root@hadoop002:~# vim /etc/profile
添加如下内容
export SPARK_HOME=/opt/spark-3.2.2-bin-hadoop3.2
export PATH=$PATH:$SPARK_HOME/bin:$SPARK_HOME/sbin
在主节点做以下配置(hadoop001)
切换hadoop用户
su - hadoop
进入spark conf目录编辑配置文件
hadoop@hadoop001:~$ cd /opt/spark-3.2.2-bin-hadoop3.2/conf/
hadoop@hadoop001:/opt/spark-3.2.2-bin-hadoop3.2/conf$ cp spark-env.sh.template spark-env.sh
hadoop@hadoop001:/opt/spark-3.2.2-bin-hadoop3.2/conf$ vim spark-env.sh
添加如下内容
export JAVA_HOME=$JAVA_HOME
export HADOOP_HOME=$HADOOP_HOME
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export SPARK_MASTER_HOST=hadoop001
export SPARK_LOCAL_DIRS=$SPARK_HOME
export SPARK_DRIVER_MEMORY=4g #内存
export SPARK_WORKER_CORES=2 #cpus核心数
export SPARK_EXECUTOR_MEMORY=2g
修改worker
hadoop@hadoop001:/opt/spark-3.2.2-bin-hadoop3.2/conf$ cp workers.template workers
添加如下
hadoop001
hadoop002
hadoop003
进入spark sbin目录修改配置文件
hadoop@hadoop001:/opt/spark-3.2.2-bin-hadoop3.2/conf$ cd ../sbin/
hadoop@hadoop001:/opt/spark-3.2.2-bin-hadoop3.2/sbin$ vim spark-config.sh
添加JAVA_HOME路径
export JAVA_HOME=/usr/local/jdk1.8.0_192
开启spark服务
hadoop@hadoop001:/opt/spark-3.2.2-bin-hadoop3.2/sbin$ ./start-all.sh
starting org.apache.spark.deploy.master.Master, logging to /opt/spark-3.2.2-bin-hadoop3.2/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-hadoop001.out
hadoop001: starting org.apache.spark.deploy.worker.Worker, logging to /opt/spark-3.2.2-bin-hadoop3.2/logs/spark-hadoop-org.apache.spark.deploy.worker.Worker-1-hadoop001.out
hadoop002: starting org.apache.spark.deploy.worker.Worker, logging to /opt/spark-3.2.2-bin-hadoop3.2/logs/spark-hadoop-org.apache.spark.deploy.worker.Worker-1-hadoop002.out
hadoop003: starting org.apache.spark.deploy.worker.Worker, logging to /opt/spark-3.2.2-bin-hadoop3.2/logs/spark-hadoop-org.apache.spark.deploy.worker.Worker-1-hadoop003.out
查看各节点服务是否开启
hadoop001
hadoop@hadoop001:/opt/spark-3.2.2-bin-hadoop3.2/sbin$ jps
13200 NodeManager
47827 Master
48007 Jps
12777 NameNode
12906 DataNode
47947 Worker
hadoop002
hadoop@hadoop002:~$ jps
16913 Worker
10773 NodeManager
10493 DataNode
17022 Jps
10639 ResourceManager
hadoop003
hadoop@hadoop003:~$ jps
22818 Worker
22962 Jps
11157 NodeManager
10922 DataNode
11052 SecondaryNameNode