flume--自定义可记录偏移量的TailFileSource

使用flume实时采集Nginx产生的log数据时,如果机器宕机了,数据就会丢失,而且会重复读取数据,那么避免这种情况的发生,我们就要自定义一个可以记录偏移量的Source。
在这里插入图片描述
这里使用TailFileSource,一次监听一个文件。

代码如下:(根据execSource源码编写)

package cn.edu360.flume.source;

import org.apache.commons.io.FileUtils;
import org.apache.flume.Context;
import org.apache.flume.EventDrivenSource;
import org.apache.flume.channel.ChannelProcessor;
import org.apache.flume.conf.Configurable;
import org.apache.flume.event.EventBuilder;
import org.apache.flume.source.AbstractSource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.charset.Charset;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;

/**
 * Created by zx on xxx.
 * flume source 的生命周期:构造器 -> configure -> start -> processor.process
 * 1.读取配置文件:(配置文件的内容:读取哪个文件、编码集、偏移量写到哪个文件、多长时间检查一下文件是否有新内容)
 */
public class TailFileSource extends AbstractSource implements EventDrivenSource, Configurable {

    private static final Logger logger = LoggerFactory.getLogger(TailFileSource.class);

    private String filePath;
    private String charset;
    private String posiFile;
    private long interval;
    private ExecutorService executor;
    private FileRunnable fileRunnable;

    @Override
    public void configure(Context context) {
        filePath = context.getString("filePath");
        charset = context.getString("charset", "UTF-8");
        posiFile = context.getString("posiFile");
        interval = context.getLong("interval", 1000L);
    }


    @Override
    public synchronized void start() {
        //创建一个单线程的线程池
        executor = Executors.newSingleThreadExecutor();
        //定义一个实现runnable接口的类
        fileRunnable = new FileRunnable(filePath, posiFile, interval, charset, getChannelProcessor());
        //实现runnable接口的类提交到线程池
        executor.submit(fileRunnable);
        //调用父类的start方法
        super.start();
    }

    @Override
    public synchronized void stop() {
        fileRunnable.setFlag(false);
        executor.shutdown();
        while (!executor.isTerminated()) {
            logger.debug("Waiting for filer executor service to stop");
            try {
                executor.awaitTermination(500, TimeUnit.MILLISECONDS);
            } catch (InterruptedException e) {
                logger.debug("Interrupted while waiting for exec executor service "
                        + "to stop. Just exiting.");
                Thread.currentThread().interrupt();
            }
        }
        super.stop();
    }


    private static class FileRunnable implements Runnable {

        private long interval;
        private String charset;
        private ChannelProcessor channelProcessor;
        private long offset = 0L;
        private RandomAccessFile raf;
        private boolean flag = true;
        private File positionFile;

        /**
         * @param filePath
         * @param posiFile
         * @param interval
         * @param charset
         * @param channelProcessor
         */
        private FileRunnable(String filePath, String posiFile, long interval, String charset, ChannelProcessor channelProcessor) {
            this.interval = interval;
            this.charset = charset;
            this.channelProcessor = channelProcessor;


            //读取偏移量,如果有,就接着读,没有就从头读
            positionFile = new File(posiFile);
            if (!positionFile.exists()) {
                //不存在就创建一个位置文件
                try {
                    positionFile.createNewFile();
                } catch (IOException e) {
                    //e.printStackTrace();
                    logger.error("create position file error", e);
                }
            }
            //读取偏移量
            try {
                String offsetString = FileUtils.readFileToString(positionFile);
                //如果以前记录过偏移量
                if (offsetString != null && !"".equals(offsetString)) {
                    //将当前的偏移量转换成long
                    offset = Long.parseLong(offsetString);
                }
                //读取log文件是从指定的位置读取数据
                raf = new RandomAccessFile(filePath, "r");
                //按照指定的偏移量读取
                raf.seek(offset);
            } catch (IOException e) {
                //e.printStackTrace();
                logger.error("read position file error", e);
            }

        }

        @Override
        public void run() {

            while (flag) {
                try {
                    //读取log件中的新数据

                    String line = raf.readLine();

                    if (line != null) {
                        line = new String(line.getBytes("ISO-8859-1"), charset);
                        //将数据发送给Channel
                        channelProcessor.processEvent(EventBuilder.withBody(line.getBytes()));
                        //获取最新的偏移量,然后更新偏移量
                        offset = raf.getFilePointer();
                        //将偏移量写入到位置文件中
                        FileUtils.writeStringToFile(positionFile, offset + "");
                    } else {
                        Thread.sleep(interval);
                    }
                } catch (InterruptedException e) {
                    //e.printStackTrace();
                    logger.error("read file thread interrupted", e);
                } catch (IOException e) {
                    logger.error("read log file error", e);
                }

            }
        }

        private void setFlag(boolean flag) {
            this.flag = flag;
        }
    }


}

然后打包,将jar包拷贝到flume安装目录中的lib目录下。

先进行测试一下:

创建一个配置文件:a1.conf

#bin/flume-ng agent -n a1 -f /home/hadoop/a1.conf -c conf -Dflume.root.logger=INFO,console
#定义agent名, source、channel、sink的名称
a1.sources = r1
a1.channels = c1
a1.sinks = k1

#具体定义source
a1.sources.r1.type = cn.edu360.flume.source.TailFileSource
a1.sources.r1.filePath = /Users/zx/Documents/logs/access.txt
a1.sources.r1.posiFile = /Users/zx/Documents/logs/posi.txt
a1.sources.r1.interval = 2000
a1.sources.r1.charset = UTF-8

#具体定义channel
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100

#具体定义sink
#file_rool  每隔一段时间回滚一个文件
a1.sinks.k1.type = file_roll
a1.sinks.k1.sink.directory = /Users/zx/Desktop/k1

#组装source、channel、sink
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1

创建一个日志文件access.txt(要与配置文件中的名字对应)
再创建一个保存结果的目录,名字为k1(也要与配置文件中的名字对应)

然后启动flume,启动命令为:

bin/flume-ng agent -n a1 -f /home/hadoop/a1.conf -c conf -Dflume.root.logger=INFO,console

-n agent的名字
-f 配置文件的路径
-c 默认读取的一些配置文件
-Dflume.root.logger 指定打印的日志,方便调试

然后往access.txt文件中添加内容,查看结果:

echo "hello world" >> access.txt
echo "hello jerry" >> access.txt
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值