1.模拟日志数据的产生
for i in {1..1000000}; do echo "${i},lisi,`date +%s`000,iphone8plus,submit_order" >> a.log; sleep 0.5; done
数据在不断的产出
同理 在另一台虚拟机上产数据
2.自定义拦截器
自定义的拦截器 用于记录业务端产生时的时间戳
2.1 在pom文件中添加依赖
<dependencies>
<dependency>
<groupId>org.apache.flume</groupId>
<artifactId>flume-ng-core</artifactId>
<version>1.9.0</version>
<scope>provided</scope>
</dependency>
</dependencies>
2.2 写拦截器
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.interceptor.Interceptor;
import java.util.List;
public class EventTimeStampInterceptor implements Interceptor {
private String splitby;
private Integer ts_index;
public EventTimeStampInterceptor(String splitby, Integer ts_index) {
this.splitby = splitby;
this.ts_index = ts_index;
}
/**
* 初始化方法,在正式调用拦截逻辑之前,会先调用一次
*/
public void initialize() {
}
/**
* 拦截的处理逻辑所在方法
*
* 假设,我们要采集的数据,格式如下:
* id,name,timestamp,devicetype,event
*
* @param event
* @return
*/
public Event intercept(Event event) {
byte[] body = event.getBody();
String line = new String(body);
// 从事件内容中提取事件时间戳
String[] split = line.split(this.splitby);
String timestampStr = split[this.ts_index];
// 将时间戳放入header
event.getHeaders().put("timestamp",timestampStr);
return event;
}
public List<Event> intercept(List<Event> list) {
for (Event event : list) {
intercept(event);
}
return list;
}
/**
* 关闭清理方法,在销毁该拦截器实例之前,会调用一次
*/
public void close() {
}
/**
* builder是用于提供给flume来构建自定义拦截器对象的
*
*/
public static class EventTimeStampInterceptorBuilder implements Interceptor.Builder{
String splitby ;
Integer ts_index;
/**
* flume会调用该方法来创建我们的自定义拦截器对象
* @return
*/
public Interceptor build() {
return new EventTimeStampInterceptor(splitby,ts_index);
}
/**
* flume会将加载的参数,通过该方法传递进来
* @param context
*/
public void configure(Context context) {
splitby = context.getString("split_by",",");
ts_index = context.getInteger("ts_index", 2);
}
}
}
2.3 上游的conf文件
a1.sources = r1
a1.channels = c1
a1.sinks = k1a1.sources.r1.channels = c1
a1.sources.r1.type = exec
a1.sources.r1.batchSize = 100
a1.sources.r1.command = tail -F /log/a.log
a1.sources.r1.interceptors = i1
a1.sources.r1.interceptors.i1.type = flume.stampInterceptor.EventTimeStampInterceptor$EventTimeStampInterceptorBuilder
a1.sources.r1.interceptors.i1.split_by = ,
a1.sources.r1.interceptors.i1.ts_index = 2a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 200a1.sinks.k1.channel = c1
a1.sinks.k1.type = avro
a1.sinks.k1.hostname = linux03
a1.sinks.k1.port = 13131
a1.sinks.k1.batch-size = 100
2.4 下游的conf文件
a1.sources = r1
a1.channels = c1
a1.sinks = k1a1.sources.r1.channels = c1
a1.sources.r1.type = avro
a1.sources.r1.bind = 0.0.0.0
a1.sources.r1.port = 13131a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 200
a1.sinks.k1.channel = c1
a1.sinks.k1.type = hdfs
a1.sinks.k1.hdfs.path = hdfs://linux01:8020/logs2/%Y-%m-%d/
a1.sinks.k1.hdfs.filePrefix = ABData
a1.sinks.k1.hdfs.fileSuffix = .log
a1.sinks.k1.hdfs.rollInterval = 300
a1.sinks.k1.hdfs.rollSize = 268435456
a1.sinks.k1.hdfs.rollCount = 0
a1.sinks.k1.hdfs.batchSize = 100
a1.sinks.k1.hdfs.useLocalTimeStamp = false
a1.sinks.k1.hdfs.fileType = DataStream
2.5 启动命令
bin/flume-ng agent -c conf/ -f confss/xiayou.conf -n a1 -Dflume.root.logger=INFO,console
先启动下游,在启动上游