Spark2
下载spark2纯净版
wget https://archive.apache.org/dist/spark/spark-2.4.7/spark-2.4.7-bin-without-hadoop.tgz
安装配置纯净版
默认配置不赘述。
-
删除多余的cmd文件
cd 纯净版解压路径 rm -rf */*.cmd
-
配置spark-env.sh
export HADOOP_HOME=/opt/module/hadoop3 export SPARK_DIST_CLASSPATH=$(${HADOOP_HOME}/bin/hdfs classpath)
Hive3
hive-env.sh
export HADOOP_HOME=/opt/module/hadoop3
export SPARK_HOME=/opt/module/spark2-with
hive-site.xml
<configuration>
<!-- Spark2 依赖库位置,在YARN 上运行的任务需要从HDFS 中查找依赖jar 文件 -->
<property>
<name>spark.yarn.jars</name>
<value>${fs.defaultFS}/spark2-jars/*</value>
</property>
<!-- Hive3 执行引擎设为spark -->
<property>
<name>hive.execution.engine</name>
<value>spark</value>
</property>
<!-- Hive3 和Spark2 连接超时时间 -->
<property>
<name>hive.spark.client.connect.timeout</name>
<value>30000ms</value>
</property>
</configuration>
spark-defaults.conf
spark.master yarn
# 启用日志聚合
spark.eventLog.enabled true
# 保存日志的HDFS 路径
spark.eventLog.dir hdfs://sc1:9820/spark2-history
spark.executor.memory 1g
spark.driver.memory 1g
# 下面是spark 动态资源调控优化项
# 开启异步shuffle
spark.shuffle.service.enabled true
# 异步shuffle端口
spark.shuffle.service.port 7337
# 允许动态资源调控
spark.dynamicAllocation.enabled true
# 最大Executor数
spark.dynamicAllocation.maxExecutors 128
spark动态资源调控需要yarn配置
<property>
<name>yarn.nodemanager.aux-services</name>
<value>spark_shuffle,mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.spark_shuffle.class</name>
<value>org.apache.spark.network.yarn.YarnShuffleService</value>
</property>
将spark纯净版解压目录下yarn目录中的spark-2.4.7-yarn-shuffle.jar
拷贝进hadoop目录下share/hadoop/yarn/lib/
下并进行分发。
spark2依赖库处理
cd /opt/module/spark2-with/jars
mv orc-core-1.5.5-nohive.jar orc-core-1.5.5-nohive.jar.bak
//上传jar包到hdfs
hdfs dfs -rm -r -f /spark2-jars
hdfs dfs -mkdir /spark2-jars
hdfs dfs -put /opt/module/spark2-with/jars/* /spark2-jars
hdfs dfs -ls /spark2-jars
//拷贝jar包到hive
cp scala-compiler-2.11.12.jar scala-library-2.11.12.jar scala-reflect-2.11.12.jar spark-core_2.11-2.4.7.jar spark-network-common_2.11-2.4.7.jar spark-unsafe_2.11-2.4.7.jar spark-yarn_2.11-2.4.7.jar /opt/module/hive3/lib/
// 将orc上传至hdfs
hdfs dfs -put /opt/module/hive3/lib/orc-core-1.5.6.jar /spark2-jars