核心:spark集群加hdfs
配置:
# spark/conf/workers
# spark/conf/spark-env.sh
# spark/conf/spark-defaults.conf
# spark/conf/log4j.properties
# hdfs
# spark/conf/workers
node1.itcast.cn
node2.itcast.cn
node3.itcast.cn
# spark/conf/spark-env.sh
JAVA_HOME=/export/server/jdk1.8.0_241/ HADOOP_CONF_DIR=/export/server/hadoop-3.3.0/etc/hadoop/ YARN_CONF_DIR=/export/server/hadoop-3.3.0/etc/hadoop/ export SPARK_MASTER_HOST=node1 export SPARK_MASTER_PORT=7077 SPARK_MASTER_WEBUI_PORT=8080 SPARK_WORKER_CORES=1 SPARK_WORKER_MEMORY=1g SPARK_WORKER_PORT=7078 SPARK_WORKER_WEBUI_PORT=8081 SPARK_HISTORY_OPTS="-Dspark.history.fs.logDirectory=hdfs://node1:8020/sparklog/ -Dspark.history.fs.cleaner.enabled=true"
# hdfs
hdfs dfs -mkdir -p /sparklog/
# spark/conf/spark-defaults.conf
spark.eventLog.enabled true spark.eventLog.dir hdfs://node1:8020/sparklog/ spark.eventLog.compress true
# spark/conf/log4j.properties
log4j.rootCategory=info 改为 log4j.rootCategory=warn
# 分发到其他机器
scp -r spark-3.1.2-bin-hadoop3.2/ root@node2:$PWD
scp -r spark-3.1.2-bin-hadoop3.2/ root@node3:$PWD
# 启动与停止
# 方式1
/export/server/spark/sbin/start-all.sh
/export/server/spark/sbin/stop-all.sh
# 方式2
/export/server/spark/sbin/start-master.sh
/export/server/spark/sbin/stop-master.sh
/export/server/spark/sbin/start-slaves.sh
/export/server/spark/sbin/stop-slaves.sh# 启动日志 (18080)
/export/server/spark/sbin/start-history-server.sh
连接
/export/server/spark/bin/pyspark --master spark://node1:7077