RollingFileSink二次开发

flume 1.8 file_roll相关参数配置

把 Event 存储到本地文件系统。 

属性

默认值

解释

channel

与 Sink 绑定的 channel

type

组件类型,这个是: file_roll.

sink.directory

Event 将要保存的目录

sink.pathManager

DEFAULT

配置使用哪个路径管理器,这个管理器的作用是按照规则生成新的存储文件名称,可选值有: default 、 rolltime

default规则:prefix+当前毫秒值+“-”+文件序号+“.”+extension;

rolltime规则:prefix+yyyyMMddHHmmss+“-”+文件序号+“.”+extension;

注:prefix 和 extension 如果没有配置则不会附带

sink.pathManager.extension

如果上面的 pathManager 使用默认的话,可以用这个属性配置存储文件的扩展名

sink.pathManager.prefix

如果上面的 pathManager 使用默认的话,可以用这个属性配置存储文件的文件名的固定前缀

sink.rollInterval

30

表示每隔30秒创建一个新文件进行存储。如果设置为0,表示所有 Event 都会写到一个文件中。

sink.serializer

TEXT

配置 Event 序列化器,可选值有:text 、 header_and_text 、 avro_event 或者自定义实现了 EventSerializer.Builder 接口的序列化器的全限定类名.。 text 只会把 Event 的 body 的文本内容序列化; header_and_text 会把 header 和 body 内容都序列化。

batchSize

100

每次请求批处理的 Event 数

配置范例:

a1.channels = c1
a1.sinks = k1
a1.sinks.k1.type = file_roll
a1.sinks.k1.channel = c1
a1.sinks.k1.sink.directory = /var/log/flume

 

二次开发

 

因项目要求:

1、根据时间动态生成日志目录:/logs/yyyyMM/yyyyMMdd/HH

2、文件名随时间动态生成:a_yyyyMMddHH_LOG-11001-UserStatusChange_00_001.dat        ——yyyyMMddHH为年月日小时

 

现有配置无法满足要求,所以对RollingFileSink进行二次开发,代码如下:

package com.demo.flume.sink;

import com.google.common.base.Preconditions;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.apache.flume.*;
import org.apache.flume.conf.Configurable;
import org.apache.flume.formatter.output.PathManager;
import org.apache.flume.formatter.output.PathManagerFactory;
import org.apache.flume.instrumentation.SinkCounter;
import org.apache.flume.lifecycle.LifecycleState;
import org.apache.flume.serialization.EventSerializer;
import org.apache.flume.serialization.EventSerializerFactory;
import org.apache.flume.sink.AbstractSink;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;


public class RollingFileSink2 extends AbstractSink implements Configurable {
    private static final Logger logger = LoggerFactory
            .getLogger(RollingFileSink2.class);
    private static final long defaultRollInterval = 30;
    private static final int defaultBatchSize = 100;

    private int batchSize = defaultBatchSize;

    private File directory;
    private long rollInterval;
    private OutputStream outputStream;
    private ScheduledExecutorService rollService;

    private String serializerType;
    private Context serializerContext;

    private String fileNameTimeFormatStr;
    private SimpleDateFormat fileNameTimeFormat;
    private String dirTimeFormatStr;
    private List<SimpleDateFormat> sfList;
    private String filePrefix;// 前缀
    private String fileSuffix;// 后缀

    private EventSerializer serializer;

    private SinkCounter sinkCounter;

    private PathManager pathController;
    private String dir;
    private volatile boolean shouldRotate;

    public RollingFileSink2() {
        shouldRotate = false;
    }

    @Override
    public void configure(Context context) {

        String pathManagerType = context.getString("sink.pathManager", "DEFAULT");
        dir = context.getString("sink.directory");
        String rollInterval = context.getString("sink.rollInterval");

        serializerType = context.getString("sink.serializer", "TEXT");
        serializerContext =
                new Context(context.getSubProperties("sink." +
                        EventSerializer.CTX_PREFIX));

        Context pathManagerContext =
                new Context(context.getSubProperties("sink." +
                        PathManager.CTX_PREFIX));
        pathController = PathManagerFactory.getInstance(pathManagerType, pathManagerContext);

        Preconditions.checkArgument(dir != null, "Directory may not be null");
        Preconditions.checkNotNull(serializerType, "Serializer type is undefined");

        dir = (dir.lastIndexOf("/")==dir.length()-1) ? dir : dir+"/";

        if (rollInterval == null) {
            this.rollInterval = defaultRollInterval;
        } else {
            this.rollInterval = Long.parseLong(rollInterval);
        }

        batchSize = context.getInteger("sink.batchSize", defaultBatchSize);

        //格式化文件夹时间
        dirTimeFormatStr = context.getString("sink.dir.timeFormat", "");
        if (dirTimeFormatStr != ""){
            String[] dirTimeFormatArray = dirTimeFormatStr.split("\\/",-1);
            sfList = new ArrayList<>();
            for (String s: dirTimeFormatArray){
                sfList.add(new SimpleDateFormat(s));
            }
            logger.info("sfList.size=" + sfList.size());
        }

        //格式化文件时间
        fileNameTimeFormatStr = context.getString("sink.file.name.timeFormat", "");
        if (fileNameTimeFormatStr != "") {
            fileNameTimeFormat = new SimpleDateFormat(fileNameTimeFormatStr);
        }

        filePrefix = context.getString("sink.prefix", "");
        fileSuffix = context.getString("sink.suffix","");

        this.directory = new File(dir);
        if (sinkCounter == null) {
            sinkCounter = new SinkCounter(getName());
        }

    }


    @Override
    public void start() {
        logger.info("Starting {}...", this);

        sinkCounter.start();
        super.start();

        pathController.setBaseDirectory(directory);
        if (rollInterval > 0) {

            rollService = Executors.newScheduledThreadPool(
                    1,
                    new ThreadFactoryBuilder().setNameFormat(
                            "rollingFileSink-roller-" +
                                    Thread.currentThread().getId() + "-%d").build());

            /*
             * Every N seconds, mark that it's time to rotate. We purposefully do NOT
             * touch anything other than the indicator flag to avoid error handling
             * issues (e.g. IO exceptions occuring in two different threads.
             * Resist the urge to actually perform rotation in a separate thread!
             */
            rollService.scheduleAtFixedRate(new Runnable() {

                @Override
                public void run() {
                    logger.debug("Marking time to rotate file {}",
                            pathController.getCurrentFile());
                    shouldRotate = true;
                }

            }, rollInterval, rollInterval, TimeUnit.SECONDS);
        } else {
            logger.info("RollInterval is not valid, file rolling will not happen.");
        }
        logger.info("RollingFileSink {} started.", getName());
    }


    @Override
    public Status process() throws EventDeliveryException {
        Date date = new Date();

        //创建文件夹
        StringBuilder dirName = new StringBuilder(dir);
        for (SimpleDateFormat sf: sfList){
            dirName.append(sf.format(date) + "/");
        }
        File file = new File(dirName.toString());
        if (!file.exists()) {
            file.mkdirs();
            logger.info("create dir " + dirName);
        }


        //创建文件
        String fileNameTime = "";
        if (fileNameTimeFormat != null) {
            fileNameTime = fileNameTimeFormat.format(date);
        }
        String fileName = filePrefix + fileNameTime + fileSuffix;
        File currentFile = new File(dirName + fileName);
        if (!currentFile.exists()) {
            try {
                logger.info("create file " + dirName);
                currentFile.createNewFile();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }


        if (outputStream == null) {

            logger.debug("Opening output stream for file {}", currentFile);
            try {
                // 注意这里的true,代表append到文件中
                outputStream = new BufferedOutputStream(
                        new FileOutputStream(currentFile,true));
                serializer = EventSerializerFactory.getInstance(
                        serializerType, serializerContext, outputStream);
                serializer.afterCreate();
                sinkCounter.incrementConnectionCreatedCount();
            } catch (IOException e) {
                sinkCounter.incrementConnectionFailedCount();
                throw new EventDeliveryException("Failed to open file "
                        + currentFile + " while delivering event", e);
            }
        }


        Channel channel = getChannel();
        Transaction transaction = channel.getTransaction();
        Event event = null;
        Status result = Status.READY;
        try {
            transaction.begin();
            int eventAttemptCounter = 0;
            for (int i = 0; i < batchSize; i++) {
                event = channel.take();
                if (event != null) {
                    sinkCounter.incrementEventDrainAttemptCount();
                    eventAttemptCounter++;
                    serializer.write(event);
                } else {
                    // No events found, request back-off semantics from runner
                    result = Status.BACKOFF;
                    break;
                }
            }
            serializer.flush();
            outputStream.flush();
            transaction.commit();
            sinkCounter.addToEventDrainSuccessCount(eventAttemptCounter);
            // 关闭流,时间到了的时候会写入另一个文件,否则会一直写入一个文件。
            if (outputStream != null) {
                logger.debug("Closing file {}", currentFile);
                try {
                    serializer.beforeClose();
                    outputStream.close();
                    sinkCounter.incrementConnectionClosedCount();
                } catch (IOException e) {
                    sinkCounter.incrementConnectionFailedCount();
                    throw new EventDeliveryException("Unable to rotate file "
                            + currentFile + " while delivering event", e);
                } finally {
                    serializer = null;
                    outputStream = null;
                }
            }
        } catch (Exception ex) {
            transaction.rollback();
            throw new EventDeliveryException("Failed to process transaction", ex);
        } finally {
            transaction.close();
        }

        return result;
    }

    @Override
    public void stop() {
        logger.info("RollingFile sink {} stopping...", getName());
        sinkCounter.stop();
        super.stop();

        if (outputStream != null) {
            logger.debug("Closing file {}", pathController.getCurrentFile());

            try {
                serializer.flush();
                serializer.beforeClose();
                outputStream.close();
                sinkCounter.incrementConnectionClosedCount();
            } catch (IOException e) {
                sinkCounter.incrementConnectionFailedCount();
                logger.error("Unable to close output stream. Exception follows.", e);
            } finally {
                outputStream = null;
                serializer = null;
            }
        }
        if (rollInterval > 0) {
            rollService.shutdown();

            while (!rollService.isTerminated()) {
                try {
                    rollService.awaitTermination(1, TimeUnit.SECONDS);
                } catch (InterruptedException e) {
                    logger.debug("Interrupted while waiting for roll service to stop. " +
                            "Please report this.", e);
                }
            }
        }
        logger.info("RollingFile sink {} stopped. Event metrics: {}",
                getName(), sinkCounter);
    }

    public File getDirectory() {
        return directory;
    }

    public void setDirectory(File directory) {
        this.directory = directory;
    }

    public long getRollInterval() {
        return rollInterval;
    }

    public void setRollInterval(long rollInterval) {
        this.rollInterval = rollInterval;
    }
}

打jar包放入flume的lib目录下

 

配置如下:

a1.sinks.user_k1.type = com.demo.flume.sink.RollingFileSink2
a1.sinks.user_k1.channel = user_c1
a1.sinks.user_k1.sink.directory = /opt/flume/test/logdata/userLog
a1.sinks.user_k1.batchSize = 10000
a1.sinks.user_k1.sink.rollInterval = 0
a1.sinks.user_k1.sink.dir.timeFormat = yyyyMM/yyyyMMdd/HH
a1.sinks.user_k1.sink.file.name.timeFormat = yyyyMMddHH
a1.sinks.user_k1.sink.prefix = a_
a1.sinks.user_k1.sink.suffix = _LOG-11001-UserStatusChange_00_001.dat

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值