自定义source 记录读取偏移量
添加依赖
<properties>
<version.flume>1.9.0</version.flume>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.flume</groupId>
<artifactId>flume-ng-core</artifactId>
<version>${version.flume}</version>
</dependency>
<dependency>
<groupId>org.apache.flume</groupId>
<artifactId>flume-ng-configuration</artifactId>
<version>${version.flume}</version>
</dependency>
</dependencies>
自定义source
/**
* @Author: ynz
* @Date: 2019/5/7/007 8:50
* @Version 1.0
*
* 自定义source,记录偏移量
* flume的生命周期: 先执行构造器,再执行 config方法 -> start方法-》 processor.process
* 1、读取配置文件:(配置读取的文件内容:读取那个文件,编码及、偏移量写到那个文件,多长时间检测一下文件是否有新内容
*/
public class TailFileSource extends AbstractSource implements EventDrivenSource, Configurable {
//记录日志
private static final Logger logger = LoggerFactory.getLogger(TailFileSource.class);
private String filePath;
private String charset;
private String positionFile;
private long interval;
private ExecutorService executor;
private FileRunnable fileRunnable;
@Override
public void configure(Context context) {
//读取哪个文件
filePath = context.getString("filePath");
//默认使用utf-8
charset = context.getString("charset","UTF-8");
//把偏移量写到哪
positionFile = context.getString("positionFile");
//指定默认每个一秒 去查看一次是否有新的内容
interval = context.getLong("interval", 1000L);
}
/*
*1、创建一个线程来监听一个文件
*/
@Override
public synchronized void start() {
//创建一个单线程的线程池
executor = Executors.newSingleThreadExecutor();
//获取一个ChannelProcessor
final ChannelProcessor channelProcessor = getChannelProcessor();
fileRunnable = new FileRunnable(filePath,charset,positionFile,interval,channelProcessor);
//提交到线程池中
executor.submit(fileRunnable);
//调用父类的方法
super.start();
}
@Override
public synchronized void stop() {
//停止
fileRunnable.setFlag(false);
//停止线程池
executor.shutdown();
while (!executor.isTerminated()){
logger.debug("Waiting for filer exec executor service to stop");
try {
//等500秒在停
executor.awaitTermination(500, TimeUnit.MILLISECONDS);
} catch (InterruptedException e) {
logger.debug("InterutedExecption while waiting for exec executor service" +
" to stop . Just exiting");
e.printStackTrace();
}
}
super.stop();
}
private static class FileRunnable implements Runnable{
private String charset;
private long interval;
private long offset = 0L;
private ChannelProcessor channelProcessor;
private RandomAccessFile raf;
private boolean flag = true;
private String filePath;
private File posFile;
/*
先于run方法执行,构造器只执行一次
先看看有没有偏移量,如果有就接着读,如果没有就从头开始读
*/
public FileRunnable(String filePath, String charset, String positionFile, long interval, ChannelProcessor channelProcessor) {
this.charset = charset;
this.interval = interval;
this.channelProcessor = channelProcessor;
this.filePath = filePath;
//读取偏移量, 在postionFile文件
posFile = new File(positionFile);
try{
if(!posFile.exists()){
//如果不存在就创建一个文件
posFile.createNewFile();
}
String offsetString = FileUtils.readFileToString(posFile);
//以前读取过
if(!offsetString.isEmpty()&&null!=offsetString&&!"".equals(offsetString)){
//把偏移量穿换成long类型
offset = Long.parseLong(offsetString);
}
}catch (Exception e){
}
}
@Override
public void run() {
while (flag){
//按照指定的偏移量读取数据
// List<Event> events = new ArrayList<Event>();
//读取文件中的新数据
try {
raf = new RandomAccessFile(filePath,"r");
//按照指定的偏移量读取
raf.seek(offset);
String line = raf.readLine();
logger.info("file content:{}",line);
if(line!=null){
//有数据进行处理,避免出现乱码
line = new String(line.getBytes("UTF-8"),charset);
channelProcessor.processEvent(EventBuilder.withBody(line.getBytes()));
//获取偏移量,更新偏移量
offset = raf.getFilePointer();
//将偏移量写入到位置文件中
FileUtils.writeStringToFile(posFile,offset+"");
}else{
//没读到谁一会儿
Thread.sleep(interval);
}
} catch (InterruptedException e) {
e.printStackTrace();
logger.error("read filethread Interrupted",e);
}catch (IOException e){
logger.error("read log file error",e);
}
}
}
public void setFlag(boolean flag) {
this.flag = flag;
}
}
}
编写完以后,打成jar包,放到flume的lib下
conf的配置如下
#定义agent名, source、channel、sink的名称
a1.sources = r1
a1.channels = c1
a1.sinks = k1
#具体定义source,这里的type是自定义的source的类的全路径
a1.sources.r1.type = cn.myframe.source.TailFileSource
#这里的参数名都和自定义类的参数一直
#读取哪个文件
a1.sources.r1.filePath = /usr/local/flume/data2/a.txt
#偏移量保存的文件
a1.sources.r1.positionFile = /usr/local/flume/data2/index
#时间间隔,每隔多久读取一次
a1.sources.r1.interval = 2000
#编码
a1.sources.r1.charset = UTF-8
#具体定义channel
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
#具体定义sink
a1.sinks.k1.type = file_roll
a1.sinks.k1.sink.directory = /usr/local/flume/data2/sink
#组装source、channel、sink
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
启动
flume-ng agent --conf /usr/local/flume/apache-flume-1.9.0-bin/conf --conf-file /usr/local/flume/apache-flume-1.9.0-bin/conf/tail.conf --name a1 -Dflume.root.logger=INFO,console
实现把文件
a.txt
的内容复制到/usr/local/flume/data2/sink
目录