spark集群安装部署(spark on yarn)
1.前提
已经部署了Hadoop 2.2集群
2.下载并安装scala
2.1下载scala
2.2安装scala
mkdir -p /usr/local/myspark/scala
cd /opt
tar -zxvf scala-2.11.2.tgz
cp -r scala-2.11.2 /usr/local/myspark/scala/
echo "export SCALA_HOME=/usr/local/myspark/scala/scala-2.11.2" >> /etc/profile
echo "export PATH=$SCALA_HOME/bin:$PATH" >> /etc/profile
source /etc/profile
3.下载spark
4.安装spark
mkdir -p /usr/local/myspark/spark
cd /opt
tar -zxvf spark-1.0.2-bin-hadoop2.tgz
cp -r spark-1.0.2-bin-hadoop2 /usr/local/myspark/spark/
5.配置
5.2配置
spark-env.sh
cd /usr/local/myspark/spark/spark-1.0.2-bin-hadoop2/conf
cp spark-env.sh.template spark-env.sh
vi spark-env.sh
export JAVA_HOME=/usr/local/java/jdk1.7.0_25
export SCALA_HOME=/usr/local/myspark/scala/scala-2.11.2
export HADOOP_CONF_DIR=/usr/local/hadoop/hadoop-2.2.0/etc/hadoop
export SPARK_HOME=/usr/local/myspark/spark/spark-1.0.2-bin-hadoop2
5.2修改spark-defaults.conf
cd /usr/local/myspark/spark/spark-1.0.2-bin-hadoop2/conf
cp spark-defaults.conf.template spark-defaults.conf
vi spark-defaults.conf
spark.master yarn-cluster
spark.eventLog.enabled true
spark.eventLog.dir hdfs://master:9000/sparkeventlog
spark.serializer org.apache.spark.serializer.KryoSerializer
5.3修改slaves
cd /usr/local/myspark/spark/spark-1.0.2-bin-hadoop2/conf
vi slaves
master
master2
slave1
slave2
slave3
5.4修改log4j.properties
cd /usr/local/myspark/spark/spark-1.0.2-bin-hadoop2/conf
cp log4j.properties.template log4j.properties
vi log4j.properties
# Set everything to be logged to the console
spark.log=/var/log
log4j.rootCategory=INFO, console,file
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
log4j.appender.file=org.apache.log4j.RollingFileAppender
log4j.appender.file.MaxFileSize=5MB
log4j.appender.file.MaxBackupIndex=10
log4j.appender.file.layout=org.apache.log4j.PatternLayout
log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
log4j.appender.file.File=${spark.log}/spark.log
# Settings to quiet third party logs that are too verbose
log4j.logger.org.eclipse.jetty=WARN
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
5.5修改/etc/profile
vi /etc/profile
echo "export
SPARK_EXAMPLES_JAR=/usr/local/myspark/spark/spark-1.0.2-bin-hadoop2/lib/spark-examples-1.0.2-hadoop2.2.0.jar" >> /etc/profile
echo "export SPARK_HOME=/usr/local/myspark/spark/spark-1.0.2-bin-hadoop2" >> /etc/profile
echo "export PATH=$SPARK_HOME/bin:$PATH" >> /etc/profile
source /etc/profile
6.拷贝到其他节点
mkdir -p /usr/local/myspark/scala
mkdir -p /usr/local/myspark/spark
scp -r 10.41.2.82:/usr/local/myspark/scala/scala-2.11.2 /usr/local/myspark/scala/
scp -r 10.41.2.82:/usr/local/myspark/spark/spark-1.0.2-bin-hadoop2 /usr/local/myspark/spark/
echo "export SCALA_HOME=/usr/local/myspark/scala/scala-2.11.2" >> /etc/profile
echo "export PATH=$SCALA_HOME/bin:$PATH" >> /etc/profile
source /etc/profile
echo "export SPARK_HOME=/usr/local/myspark/spark/spark-1.0.2-bin-hadoop2" >> /etc/profile
echo "export PATH=$SPARK_HOME/bin:$PATH" >> /etc/profile
source /etc/profile
7.启动停止
7.1启动
在master(10.41.2.82)上运行
/usr/local/myspark/spark/spark-1.0.2-bin-hadoop2/sbin/start-all.sh
7.2停止
/usr/local/myspark/spark/spark-1.0.2-bin-hadoop2/sbin/stop-all.sh
8.测试
8.1Web UI
http://10.41.2.82:8080
http://master:8080
8.2运行demo
在10.41.2.82上执行以下命令:
/usr/local/myspark/spark/spark-1.0.2-bin-hadoop2/bin/spark-submit --class org.apache.spark.examples.SparkPi --master yarn-cluster --num-executors 3 --driver-memory 4g --executor-memory 2g --executor-cores 1 /usr/local/myspark/spark/spark-1.0.2-bin-hadoop2/lib/spark-examples-1.0.2-hadoop2.2.0.jar 10
查看结果:
访问:
http://master:8088/proxy/application_1409622175934_0001/A
点击logs
结果为:
Pi is roughly 3.145044