#1.解压重命名
spark-3.1.2
#2.配置环境变量
vim /etc/profile.d/my.sh
-------------------------------------------
# spark
export SPARK_HOME=/opt/software/spark-3.1.2
export PATH=$SPARK_HOME/bin:$PATH
--------------------------------------------
激活环境变量
source /etc/profile
#3.拷贝conf目录下的spark-env.sh.template,workers.template
cp spark-env.sh.template spark-env.sh
cp workers.template workers(单机无需添加集群主机映射)
#4.添加spark-env.sh配置
export SCALA_HOME=/opt/software/scala-2.12.10
export JAVA_HOME=/opt/software/jdk1.8.0_171
export SPARK_HOME=/opt/software/spark-3.1.2
export HADOOP_INSTALL=/opt/software/hadoop-3.1.3
#spark运行在yarn上
export HADOOP_CONF_DIR=$HADOOP_INSTALL/etc/hadoop
export SPARK_MASTER_IP=single03
export SPARK_DRIVER_MEMORY=2G
#shuffle产生临时文件的地方
export SPARK_LOCAL_DIRS=/opt/bigdata/spark245
export SPARK_EXECUTOR_MEMORY=2G
#5.启动spark
spark-shell
#6.WordCount
在一个目录里新建一个txt文本:vim hello.txt
----------------
hello world
hello spark
hello scala
-----------------
val rdd1=sc.textFile("file:///root/wordcount/hello.txt")
#wordcount的scala的实现
rdd1.flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_).collect
End