下载网址
https://archive.apache.org/dist/spark/spark-2.4.4/
链接:https://pan.baidu.com/s/1BC8HRX5i-4smWKKhEj0W4g
提取码:ap5h
1.解压安装包
[root@bigdata101 software]# tar -zxvf spark-2.4.4-bin-hadoop2.7.tgz -C /opt/module/
2.进入spark的conf里面重命名:spark文件名
[root@bigdata101 module]# mv spark-2.4.4-bin-hadoop2.7 spark-2.4.4-hadoop2.7
3.新建文件slaves,spark-env.sh
[root@bigdata101 module]# cd spark-2.4.4-hadoop2.7/conf/
[root@bigdata101 conf]# cp slaves.template slaves
[root@bigdata101 conf]# cp spark-env.sh.template spark-env.sh
4.配置文件
export JAVA_HOME=/opt/module/jdk1.8.0_221
export HADOOP_HOME=/opt/module/hadoop-2.9.2
export SPARK_MASTER_IP=192.168.1.101
export SPARK_WORKED_MEMORY=1g
export HADOOP_CONF_DIR=/opt/module/hadoop-2.9.2/etc/hadoop
export SPARK_DIST_CLASSPATH=$(/opt/module/hadoop-2.9.2/bin/hadoop classpath)
5.环境变量spark-env.sh
export JAVA_HOME=/opt/module/jdk1.8.0_221
export HADOOP_HOME=/opt/module/hadoop-2.9.2
export SPARK_MASTER_IP=192.168.1.101
export SPARK_WORKED_MEMORY=1g
export HADOOP_CONF_DIR=/opt/module/hadoop-2.9.2/etc/hadoop
export SPARK_DIST_CLASSPATH=$(/opt/module/hadoop-2.9.2/bin/hadoop classpath)
6.修改slaves
bigdata102
bigdata103
7.配置环境变量(全部)
vi ~/.bash_profile
export PATH
JAVA_HOME=/opt/module/jdk1.8.0_221
HADOOP_HOME=/opt/module/hadoop-2.9.2
SPARK_HOME=/opt/module/spark-2.4.4-hadoop2.7
HIVE_HOME=/opt/module/apache-hive-2.3.6-bin
export HBASE_HOME=/opt/module/hbase-0.98.17-hadoop2
CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
PATH=$PATH:$HOME/bin:$ZK_HOME/bin:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HBASE_HOME/bin:$SPARK_HOME/bin
export PATH JAVA_HOME HIVE_HOME CLASSPATH HADOOP_HOME ZK_HOME SPARK_HOME
ZK_HOME=/opt/module/zookeeper-3.4.7
变量说明
- JAVA_HOME:Java安装目录
- SCALA_HOME:Scala安装目录
- HADOOP_HOME:hadoop安装目录
- HADOOP_CONF_DIR:hadoop集群的配置文件的目录
- SPARK_MASTER_IP:spark集群的Master节点的ip地址
- SPARK_WORKER_MEMORY:每个worker节点能够最大分配给exectors的内存大小
- SPARK_WORKER_CORES:每个worker节点所占有的CPU核数目
- SPARK_WORKER_INSTANCES:每台机器上开启的worker节点的数目
8.使环境变量生效-三台
source ~/.bash_profile
9.scp到另外两个节点上面
[root@bigdata101 module]# scp -r spark-2.4.4-hadoop2.7 root@bigdata102:/opt/module/
[root@bigdata101 module]# scp -r spark-2.4.4-hadoop2.7 root@bigdata103:/opt/module/
10.进入spark的sbin目录下面/opt/module/spark-2.4.4-hadoop2.7/sbin
[root@bigdata101 module]# cd /opt/module/spark-2.4.4-hadoop2.7/sbin
[root@bigdata101 sbin]# ./start-all.sh
11.启动之后bigdata101里有了Master
[root@bigdata101 sbin]# jps
7200 JournalNode
7696 NodeManager
9490 HMaster
8339 HRegionServer
7588 ResourceManager
7062 QuorumPeerMain
10343 Master
7482 DFSZKFailoverController
10412 Jps
7326 NameNode
12.启动之后bigdata102里有了Worker
[root@bigdata102 conf]# jps
7408 NameNode
7504 DataNode
9488 Worker
7601 DFSZKFailoverController
7106 QuorumPeerMain
7733 ResourceManager
7303 JournalNode
8152 HRegionServer
9581 Jps
7855 NodeManager
13.启动之后bigdata103里有了Worker
[root@bigdata103 conf]# jps
8112 Jps
7553 HRegionServer
8034 Worker
7348 DataNode
7111 QuorumPeerMain
7114 QuorumPeerMain
7277 JournalNode
14.进入192.168.1.101:8080
进入spark客户端
[root@bigdata101 bin]# ./pyspark
命令汇总:
./start-all.sh #启动spark,在spark-2.4.4-hadoop2.7/sbin下
./pyspark #启动spark客户端,在spark-2.4.4-hadoop2.7/bin下
hadoop fs -mkdir /spark #创建目录
hdfs fs -put stu.txt /spark #上传文件到spark目录