submit_spark.sh
#!/bin/bash
##################################################
# @date 2018/11/03
# @author sjmz
# @desc 启动spark作业示例
##################################################
HOME="/home/testid/project1"
SPARK_BIN="/home/testid/spark/bin"
CLASS="com.sjmz.StartSpark"
MASTER="yarn"
JAR_HOME="${HOME}/test.jar"
LOG_HOME="${HOME}/logs"
APP_NAME="test_app"
LOG_FILE="${LOG_HOME}/${APP_NAME}-$1`date "+-%H:%M:%S"`.log"
echo $LOG_FILE
# 根据实际情况看是否需要添加JAVA_HOME配置
export JAVA_HOME="/usr/java/jdk1.8.0_121"
# 配置参数
SOLR_ZOOKEEPER="solr.zookeeper.url=192.168.1.1:2181,192.168.1.2:2181,192.168.1.3:2181"
SPARK_APP_NAME="spark.app.name=$APP_NAME"
DATA_DT="dt=$1"
SOLR_COMMIT_TIMES="solr.commit.times=120000"
SOLR_LIMIT="solr.limit=10000"
COLLECTION_NAME="collection.name=COLL_1"
# 创建日志文件路径
if [ ! -d "${LOG_HOME}" ]; then
mkdir -p ${LOG_HOME}
fi
#--conf PROP=VALUE Arbitrary Spark configuration property.
#--conf "spark.yarn.executor.memoryOverhead=6G" \
#--driver-memory MEM Memory for driver (e.g. 1000M, 2G) (Default: 1024M)
#--executor-memory MEM Memory for executor (e.g. 1000M, 2G) (Default: 1G)
#--executor-cores NUM Number of cores per executor. (Default: 1 in YARN mode,
# or all available cores on the worker in standalone mode)
#--num-executors NUM Number of executors to launch (Default: 2).
# If dynamic allocation is enabled, the initial number of
# executor will be at least NUM.
exec nohup "${SPARK-BIN}" \
--class "${CLASS}" \
--master "${MASTER}" \
--deploy-mode "cluster" \
--conf "spark.yarn.appMasterEnv.JAVA_HOME=${JAVA_HOME}" \
--conf "spark.executorEnv.JAVA_HOME=${JAVA_HOME}" \
--num-executors 6 \
--executor-memory 6G \
"${JAR_HOME}" \
$SOLR_ZOOKEEPER $SPARK_APP_NAME $SOLR_COMMIT_TIMES $ SOLR_LIMIT $COLLECTION_NAME $DATA_DT >> $LOG_FILE 2>&1 &
while true;do
sleep 10
grep "Shutdown hook called" $LOG_FILE
if [ $? = 0 ]; then
grep "final status: SUCCEEDED" $LOG_FILE
if [ $? != 0 ]; then
echo "fail"
exit 1
fi
echo "success"
exit 0
fi
done