配置 spark-env.sh
cd /usr/local/spark/conf
cp /spark-env-template.sh spark-env.sh
vi spark-env.sh
#加入以下内容
export JAVA_HOME=/usr/lib/jvm/java
export HADOOP_CONF_DIR=/usr/local/hadoop-2.8.5/hadoop-2.8.5/etc/hadoop
export SPARK_MASTER_HOST=menber3
export SPARK_MASTER_PORT=7077
export SPARK_WORKER_CORES=1
export SPARK_WORKER_MEMORY=1g
export SPARK_MASTER_WEBUI_PORT=8088
2. 配置 slaves
cd /usr/local/spark-2.4.5-bin-hadoop2.7/conf
cp slaves.template slaves
vi slaves
#加入以下内容
menber3
menber2
menber1
3. 重命名 start-all.sh stop-all.sh
start-all.sh, stop-all.sh hadoop 下原来也有,为了避免冲突,需要重名令 spark 下的文件
cd /usr/local/spark-2.4.5-bin-hadoop2.7/sbin
mv start-all.sh start-spark-all.sh
mv stop-all.sh stop-spark-all.sh
4. 复制spark 到另外两台机器
scp -r ./spark-2.4.5-bin-hadoop2.7 root@menber2:/usr/local
scp -r ./spark-2.4.5-bin-hadoop2.7 root@menber1:/usr/local
5.修改环境变量 (3台机器都要做修改)
export SPARK_HOME=/usr/local/spark-2.4.5-bin-hadoop2.7
export PATH=$SPARK_HOME/sbin:$PATH
#使之生效
source /etc/profile
6.启动 spark/停止spark
start-spark-all.sh
stop-spark-all.sh
7. 通过页面查看
8.执行example 程序,并在hadoop YARN 上查看
#到spark的bin目录下
./spark-submit --class org.apache.spark.examples.SparkPi --master yarn --deploy-mode client --executor-memory 1G --num-executors 10 /usr/local/spark-2.4.5-bin-hadoop2.7/examples/jars/spark-examples_2.11-2.4.5.jar 100