flume 1.8 file_roll相关参数配置
把 Event 存储到本地文件系统。
属性 | 默认值 | 解释 |
---|---|---|
channel | – | 与 Sink 绑定的 channel |
type | – | 组件类型,这个是: |
sink.directory | – | Event 将要保存的目录 |
sink.pathManager | DEFAULT | 配置使用哪个路径管理器,这个管理器的作用是按照规则生成新的存储文件名称,可选值有: default规则:prefix+当前毫秒值+“-”+文件序号+“.”+extension; rolltime规则:prefix+yyyyMMddHHmmss+“-”+文件序号+“.”+extension; 注:prefix 和 extension 如果没有配置则不会附带 |
sink.pathManager.extension | – | 如果上面的 pathManager 使用默认的话,可以用这个属性配置存储文件的扩展名 |
sink.pathManager.prefix | – | 如果上面的 pathManager 使用默认的话,可以用这个属性配置存储文件的文件名的固定前缀 |
sink.rollInterval | 30 | 表示每隔30秒创建一个新文件进行存储。如果设置为0,表示所有 Event 都会写到一个文件中。 |
sink.serializer | TEXT | 配置 Event 序列化器,可选值有: |
batchSize | 100 | 每次请求批处理的 Event 数 |
配置范例:
a1.channels = c1
a1.sinks = k1
a1.sinks.k1.type = file_roll
a1.sinks.k1.channel = c1
a1.sinks.k1.sink.directory = /var/log/flume
二次开发
因项目要求:
1、根据时间动态生成日志目录:/logs/yyyyMM/yyyyMMdd/HH
2、文件名随时间动态生成:a_yyyyMMddHH_LOG-11001-UserStatusChange_00_001.dat ——yyyyMMddHH为年月日小时
现有配置无法满足要求,所以对RollingFileSink进行二次开发,代码如下:
package com.demo.flume.sink;
import com.google.common.base.Preconditions;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.apache.flume.*;
import org.apache.flume.conf.Configurable;
import org.apache.flume.formatter.output.PathManager;
import org.apache.flume.formatter.output.PathManagerFactory;
import org.apache.flume.instrumentation.SinkCounter;
import org.apache.flume.lifecycle.LifecycleState;
import org.apache.flume.serialization.EventSerializer;
import org.apache.flume.serialization.EventSerializerFactory;
import org.apache.flume.sink.AbstractSink;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.*;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
public class RollingFileSink2 extends AbstractSink implements Configurable {
private static final Logger logger = LoggerFactory
.getLogger(RollingFileSink2.class);
private static final long defaultRollInterval = 30;
private static final int defaultBatchSize = 100;
private int batchSize = defaultBatchSize;
private File directory;
private long rollInterval;
private OutputStream outputStream;
private ScheduledExecutorService rollService;
private String serializerType;
private Context serializerContext;
private String fileNameTimeFormatStr;
private SimpleDateFormat fileNameTimeFormat;
private String dirTimeFormatStr;
private List<SimpleDateFormat> sfList;
private String filePrefix;// 前缀
private String fileSuffix;// 后缀
private EventSerializer serializer;
private SinkCounter sinkCounter;
private PathManager pathController;
private String dir;
private volatile boolean shouldRotate;
public RollingFileSink2() {
shouldRotate = false;
}
@Override
public void configure(Context context) {
String pathManagerType = context.getString("sink.pathManager", "DEFAULT");
dir = context.getString("sink.directory");
String rollInterval = context.getString("sink.rollInterval");
serializerType = context.getString("sink.serializer", "TEXT");
serializerContext =
new Context(context.getSubProperties("sink." +
EventSerializer.CTX_PREFIX));
Context pathManagerContext =
new Context(context.getSubProperties("sink." +
PathManager.CTX_PREFIX));
pathController = PathManagerFactory.getInstance(pathManagerType, pathManagerContext);
Preconditions.checkArgument(dir != null, "Directory may not be null");
Preconditions.checkNotNull(serializerType, "Serializer type is undefined");
dir = (dir.lastIndexOf("/")==dir.length()-1) ? dir : dir+"/";
if (rollInterval == null) {
this.rollInterval = defaultRollInterval;
} else {
this.rollInterval = Long.parseLong(rollInterval);
}
batchSize = context.getInteger("sink.batchSize", defaultBatchSize);
//格式化文件夹时间
dirTimeFormatStr = context.getString("sink.dir.timeFormat", "");
if (dirTimeFormatStr != ""){
String[] dirTimeFormatArray = dirTimeFormatStr.split("\\/",-1);
sfList = new ArrayList<>();
for (String s: dirTimeFormatArray){
sfList.add(new SimpleDateFormat(s));
}
logger.info("sfList.size=" + sfList.size());
}
//格式化文件时间
fileNameTimeFormatStr = context.getString("sink.file.name.timeFormat", "");
if (fileNameTimeFormatStr != "") {
fileNameTimeFormat = new SimpleDateFormat(fileNameTimeFormatStr);
}
filePrefix = context.getString("sink.prefix", "");
fileSuffix = context.getString("sink.suffix","");
this.directory = new File(dir);
if (sinkCounter == null) {
sinkCounter = new SinkCounter(getName());
}
}
@Override
public void start() {
logger.info("Starting {}...", this);
sinkCounter.start();
super.start();
pathController.setBaseDirectory(directory);
if (rollInterval > 0) {
rollService = Executors.newScheduledThreadPool(
1,
new ThreadFactoryBuilder().setNameFormat(
"rollingFileSink-roller-" +
Thread.currentThread().getId() + "-%d").build());
/*
* Every N seconds, mark that it's time to rotate. We purposefully do NOT
* touch anything other than the indicator flag to avoid error handling
* issues (e.g. IO exceptions occuring in two different threads.
* Resist the urge to actually perform rotation in a separate thread!
*/
rollService.scheduleAtFixedRate(new Runnable() {
@Override
public void run() {
logger.debug("Marking time to rotate file {}",
pathController.getCurrentFile());
shouldRotate = true;
}
}, rollInterval, rollInterval, TimeUnit.SECONDS);
} else {
logger.info("RollInterval is not valid, file rolling will not happen.");
}
logger.info("RollingFileSink {} started.", getName());
}
@Override
public Status process() throws EventDeliveryException {
Date date = new Date();
//创建文件夹
StringBuilder dirName = new StringBuilder(dir);
for (SimpleDateFormat sf: sfList){
dirName.append(sf.format(date) + "/");
}
File file = new File(dirName.toString());
if (!file.exists()) {
file.mkdirs();
logger.info("create dir " + dirName);
}
//创建文件
String fileNameTime = "";
if (fileNameTimeFormat != null) {
fileNameTime = fileNameTimeFormat.format(date);
}
String fileName = filePrefix + fileNameTime + fileSuffix;
File currentFile = new File(dirName + fileName);
if (!currentFile.exists()) {
try {
logger.info("create file " + dirName);
currentFile.createNewFile();
} catch (IOException e) {
e.printStackTrace();
}
}
if (outputStream == null) {
logger.debug("Opening output stream for file {}", currentFile);
try {
// 注意这里的true,代表append到文件中
outputStream = new BufferedOutputStream(
new FileOutputStream(currentFile,true));
serializer = EventSerializerFactory.getInstance(
serializerType, serializerContext, outputStream);
serializer.afterCreate();
sinkCounter.incrementConnectionCreatedCount();
} catch (IOException e) {
sinkCounter.incrementConnectionFailedCount();
throw new EventDeliveryException("Failed to open file "
+ currentFile + " while delivering event", e);
}
}
Channel channel = getChannel();
Transaction transaction = channel.getTransaction();
Event event = null;
Status result = Status.READY;
try {
transaction.begin();
int eventAttemptCounter = 0;
for (int i = 0; i < batchSize; i++) {
event = channel.take();
if (event != null) {
sinkCounter.incrementEventDrainAttemptCount();
eventAttemptCounter++;
serializer.write(event);
} else {
// No events found, request back-off semantics from runner
result = Status.BACKOFF;
break;
}
}
serializer.flush();
outputStream.flush();
transaction.commit();
sinkCounter.addToEventDrainSuccessCount(eventAttemptCounter);
// 关闭流,时间到了的时候会写入另一个文件,否则会一直写入一个文件。
if (outputStream != null) {
logger.debug("Closing file {}", currentFile);
try {
serializer.beforeClose();
outputStream.close();
sinkCounter.incrementConnectionClosedCount();
} catch (IOException e) {
sinkCounter.incrementConnectionFailedCount();
throw new EventDeliveryException("Unable to rotate file "
+ currentFile + " while delivering event", e);
} finally {
serializer = null;
outputStream = null;
}
}
} catch (Exception ex) {
transaction.rollback();
throw new EventDeliveryException("Failed to process transaction", ex);
} finally {
transaction.close();
}
return result;
}
@Override
public void stop() {
logger.info("RollingFile sink {} stopping...", getName());
sinkCounter.stop();
super.stop();
if (outputStream != null) {
logger.debug("Closing file {}", pathController.getCurrentFile());
try {
serializer.flush();
serializer.beforeClose();
outputStream.close();
sinkCounter.incrementConnectionClosedCount();
} catch (IOException e) {
sinkCounter.incrementConnectionFailedCount();
logger.error("Unable to close output stream. Exception follows.", e);
} finally {
outputStream = null;
serializer = null;
}
}
if (rollInterval > 0) {
rollService.shutdown();
while (!rollService.isTerminated()) {
try {
rollService.awaitTermination(1, TimeUnit.SECONDS);
} catch (InterruptedException e) {
logger.debug("Interrupted while waiting for roll service to stop. " +
"Please report this.", e);
}
}
}
logger.info("RollingFile sink {} stopped. Event metrics: {}",
getName(), sinkCounter);
}
public File getDirectory() {
return directory;
}
public void setDirectory(File directory) {
this.directory = directory;
}
public long getRollInterval() {
return rollInterval;
}
public void setRollInterval(long rollInterval) {
this.rollInterval = rollInterval;
}
}
打jar包放入flume的lib目录下
配置如下:
a1.sinks.user_k1.type = com.demo.flume.sink.RollingFileSink2
a1.sinks.user_k1.channel = user_c1
a1.sinks.user_k1.sink.directory = /opt/flume/test/logdata/userLog
a1.sinks.user_k1.batchSize = 10000
a1.sinks.user_k1.sink.rollInterval = 0
a1.sinks.user_k1.sink.dir.timeFormat = yyyyMM/yyyyMMdd/HH
a1.sinks.user_k1.sink.file.name.timeFormat = yyyyMMddHH
a1.sinks.user_k1.sink.prefix = a_
a1.sinks.user_k1.sink.suffix = _LOG-11001-UserStatusChange_00_001.dat