spark job 使用log4j appender 追加日志到本地文件或者mysql

目的

在 spark job 执行的时候将 log4j 日志直接写到本地或者 mysql 中。


实现

job 使用的官网提供的 spark example。添加了几行日志:

package org.example;

import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.List;


public class JavaSparkPi {

    public static void main(String[] args) {

        Logger logger = LoggerFactory.getLogger(JavaSparkPi.class);

        logger.info("log:--- start ----");
        System.out.println("--- start ----");

        SparkSession spark = SparkSession
                .builder()
                .appName("JavaSparkPi:" + System.currentTimeMillis())
                .getOrCreate();

        JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext());

        logger.info("log--- here1 ----");
        System.out.println("--- here1 ----");

        int slices = (args.length == 1) ? Integer.parseInt(args[0]) : 2;
        int n = 100000 * slices;
        List<Integer> l = new ArrayList<>(n);
        for (int i = 0; i < n; i++) {
            l.add(i);
        }

        JavaRDD<Integer> dataSet = jsc.parallelize(l, slices);

        int count = dataSet.map(integer -> {
            double x = Math.random() * 2 - 1;
            double y = Math.random() * 2 - 1;
            return (x * x + y * y <= 1) ? 1 : 0;
        }).reduce((integer, integer2) -> integer + integer2);

        logger.info("log: Pi is roughly " + 4.0 * count / n);
        System.out.println("Pi is roughly " + 4.0 * count / n);

        spark.stop();
    }
}

依赖

  <dependencies>
    <dependency>
      <groupId>org.apache.logging.log4j</groupId>
      <artifactId>log4j-api</artifactId>
      <version>2.18.0</version>
    </dependency>
    <dependency>
      <groupId>org.apache.logging.log4j</groupId>
      <artifactId>log4j-core</artifactId>
      <version>2.18.0</version>
    </dependency>
    <dependency>
      <groupId>org.apache.logging.log4j</groupId>
      <artifactId>log4j-slf4j-impl</artifactId>
      <version>2.18.0</version>
    </dependency>
    <dependency>
      <groupId>org.apache.spark</groupId>
      <artifactId>spark-core_2.12</artifactId>
      <version>2.4.8</version>
    </dependency>
    <dependency>
      <groupId>org.apache.spark</groupId>
      <artifactId>spark-sql_2.12</artifactId>
      <version>2.4.8</version>
    </dependency>
    <dependency>
      <groupId>org.apache.hive</groupId>
      <artifactId>hive-jdbc</artifactId>
      <version>3.1.2</version>
    </dependency>
    <dependency>
      <groupId>org.glassfish</groupId>
      <artifactId>javax.el</artifactId>
      <version>3.0.1-b12</version>
    </dependency>
  </dependencies>

spark job 提交命令:

${SPARK_HOME}/bin/spark-submit --class org.example.JavaSparkPi \
  --master yarn \
  --deploy-mode cluster \
  --driver-memory 4g \
  --executor-memory 2g \
  --executor-cores 1 \
  --queue default \
  --files "/home/deployer/diego/log4j/log4j.properties" \
  --driver-java-options "-Dlog4j.debug=true -Dlog4j.configuration=log4j.properties" \
  --conf "spark.executor.extraJavaOptions=-Dlog4j.debug=true -Dlog4j.configuration=log4j.properties " \
  /home/deployer/diego/java-spark-on-yarn-1.0-SNAPSHOT-jar-with-dependencies.jar \
  10

log4j.properties 配置

# myfile 会将 spark 执行时所有的日志信息记录到本地 /tmp/application.log 文件中,包括 spark 执行时的系统信息。
log4j.rootLogger = debug,myfile
log4j.appender.myfile = org.apache.log4j.DailyRollingFileAppender
log4j.appender.myfile.File = /tmp/application.log
log4j.appender.myfile.Append = true
log4j.appender.myfile.Threshold = INFO 
log4j.appender.myfile.layout = org.apache.log4j.PatternLayout
log4j.appender.myfile.layout.ConversionPattern = %-d{yyyy-MM-dd HH:mm:ss}  [ %t:%r ] - [ %p ]  %m%n

# myappender 只记录 org.example.JavaSparkPi 类中的日志信息,这才是我们整整想要的,将日志写到本地 /tmp/application2.log 文件。
log4j.logger.org.example.JavaSparkPi=DEBUG, myappender
log4j.additivity.org.example.JavaSparkPi=false
log4j.appender.myappender=org.apache.log4j.DailyRollingFileAppender
log4j.appender.myappender.datePattern='-'dd'.log'
log4j.appender.myappender.File=/tmp/application2.log
log4j.appender.myappender.layout = org.apache.log4j.PatternLayout
log4j.appender.myappender.layout.ConversionPattern = %-d{yyyy-MM-dd HH:mm:ss}  [ %t:%r ] - [ %p ]  %m%n

# DB 将 org.example.JavaSparkPi 中的日志追加到 mysql 中,LOGS 表要提前建好。
log4j.logger.org.example.JavaSparkPi=DEBUG, DB
log4j.additivity.org.example.JavaSparkPi=false
log4j.appender.DB=org.apache.log4j.jdbc.JDBCAppender
log4j.appender.DB.URL=jdbc:mysql://172.17.99.224:3306/test?autoReconnect=true&useSSL=false&useUnicode=true&characterEncoding=UTF-8
log4j.appender.DB.driver=com.mysql.jdbc.Driver
log4j.appender.DB.user=root
log4j.appender.DB.password=pd-1234
log4j.appender.DB.sql=INSERT INTO LOGS VALUES('%X{User}','%d{yyyy-MM-dd HH:mm:ss}','%C','%p','%m')
log4j.appender.DB.layout=org.apache.log4j.PatternLayout

一个未解决的问题

有想过将 org.example.JavaSparkPi 中的日志直接写到 hive 表中,但在运行时 spark 会将 INSERT INTO LOGS VALUES('%X{User}','%d{yyyy-MM-dd HH:mm:ss}','%C','%p','%m') 语句翻译成 mapreduce 执行,而且执行失败。

log4j.logger.org.example.JavaSparkPi=DEBUG, HIVE
log4j.additivity.org.example.JavaSparkPi=false
log4j.appender.HIVE=org.apache.log4j.jdbc.JDBCAppender
log4j.appender.HIVE.URL=jdbc:hive2://172.17.99.224:10000/test?autoReconnect=true&useSSL=false&useUnicode=true&characterEncoding=UTF-8
log4j.appender.HIVE.driver=org.apache.hive.jdbc.HiveDriver
log4j.appender.HIVE.sql=INSERT INTO LOGS VALUES('%X{User}','%d{yyyy-MM-dd HH:mm:ss}','%C','%p','%m')
log4j.appender.HIVE.layout=org.apache.log4j.PatternLayout

配置如上所示,一直没查出来问题出在哪里…。有知道的小伙伴还望指教~~

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值