机器环境:scala 2.11.12、hadoop 3.0.0、spark 2.3.1
一、安装scala 2.11.12
tar -zxf scala-2.11.12.tgz -C /opt/modules/
sudo vi /etc/profile
# SCALA_HOME
export SCALA_HOME=/opt/modules/scala-2.11.12
export PATH=$SCALA_HOME/bin:$PATH
二、安装spark 2.3.1
cp conf/spark-env.sh.template conf/spark-env.sh
1. 修改spark-env.sh
vi conf/spark-env.sh
HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
SPARK_LOCAL_IP=lee
JAVA_HOME=/opt/modules/jdk1.8.0_172
SCALA_HOME=/opt/modules/scala-2.11.12
export SPARK_HISTORY_OPTS="-Dspark.history.fs.logDirectory=hdfs://lee:8020/spark/jobs/history"
2. 修改spark-default.sh
spark.eventLog.enabled true
spark.eventLog.dir hdfs://lee:8020/spark/jobs/history
spark.yarn.historyServer.address http://lee:18080
3. 修改hadoop yarn-site.xml
<property>
<name>yarn.log.server.url</name>
<value>http://lee:19888/jobhistory/job/</value>
</property>
4. 重启
hadoop:sbin/start-all.sh
sbin/mr
spark:sbin/start-all.sh
sbin/start-history-server.sh
sbin/mr-jobhistory-daemon.sh start historyserve
5. 验证
6 .sparksql集成
ln -s /opt/modules/hive-3.0.0/conf/hive-site.xml conf/
ln -s /opt/modules/hadoop-3.0.3/etc/hadoop/hdfs-site.xml conf/
编辑$HIVE_HOME/conf/hive-site.xml,增加内容如下:
<property>
<name>hive.metastore.uris</name>
<value>thrift://lee:9083</value>
<description>Thrift URI for the remote metastore. Used by metastore client to connect to remote metastore.</description>
</property>
启动hive metastore
bin/hive --service metastore 1>/dev/null 2>&1 &
查看metastore
jobs
将mysql jdbc驱动包拷贝给spark
cp /opt/modules/hive-3.0.0/lib/mysql-connector-java-8.0.11.jar jars/
启动spark-sql
bin/spark-sql
验证:http://lee:4040
spark-sql修改控制台日志级别
vi conf/log4j.properties
# Set everything to be logged to the console
log4j.rootCategory=WARN, console
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
log4j.logger.org.spark_project.jetty=WARN
log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=WARN
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=WARN
log4j.logger.org.apache.parquet=ERROR
log4j.logger.parquet=ERROR