pom文件
<dependencies>
<!--flume-->
<dependency>
<groupId>org.apache.flume</groupId>
<artifactId>flume-ng-core</artifactId>
<version>1.5.2</version>
</dependency>
<!--fastjson-->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.32</version>
</dependency>
<!--日志包-->
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
<version>1.1.3</version>
</dependency>
<dependency>
<groupId>org.logback-extensions</groupId>
<artifactId>logback-ext-spring</artifactId>
<version>0.1.2</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>jcl-over-slf4j</artifactId>
<version>1.7.12</version>
</dependency>
</dependencies>
<build>
<plugins>
<!-- 打包插件 -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>2.4</version>
<configuration>
<archive>
<manifest>
<addClasspath>true</addClasspath>
<classpathPrefix>lib/</classpathPrefix>
<mainClass></mainClass>
</manifest>
</archive>
</configuration>
</plugin>
<!-- 编译插件 -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.8</source>
<target>1.8</target>
<encoding>utf-8</encoding>
</configuration>
</plugin>
</plugins>
</build>
代码
package com.flume.interceptor;
import com.alibaba.fastjson.JSONObject;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.interceptor.Interceptor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.List;
/**
* TODO
*
* @version: 1.0
* @author: unique
* @date: 2021/1/9 14:45
* @desc:
*/
public class TestInterceptor implements Interceptor {
public static final Logger logger = LoggerFactory.getLogger(TestInterceptor.class);
public void initialize() {
logger.info("----------自定义拦截器的initialize方法执行");
}
public Event intercept(Event event) {
logger.info("----------intercept(Event event)方法执行,处理单个event");
byte[] body = event.getBody();
String data = new String(body);
JSONObject jsonObject = JSONObject.parseObject(data);
String message = jsonObject.getString("message");
if (message.contains("Create")) {
return null;
} else {
event.setBody(message.getBytes());
return event;
}
}
public List<Event> intercept(List<Event> events) {
logger.info("----------intercept(List<Event> events)方法执行");
List<Event> list = new ArrayList<>();
for (Event event : events) {
Event interceptedEvent = intercept(event);
if (interceptedEvent != null) {
list.add(interceptedEvent);
}
}
return list;
}
public void close() {
logger.info("----------自定义拦截器close方法执行");
}
public static class Builder implements Interceptor.Builder {
@Override
public Interceptor build() {
logger.info("----------build方法执行");
return new TestInterceptor();
}
@Override
public void configure(Context context) {
logger.info("----------configure方法执行");
}
}
}
将代码打包 及json jar包 都放到 flume下的lib
flume 配置 读取kafka数据到hdfs
## 组件
a1.sources=r1
a1.channels=c1
a1.sinks=k1
## source1
a1.sources.r1.type = org.apache.flume.source.kafka.KafkaSource
a1.sources.r1.batchSize = 5000
a1.sources.r1.batchDurationMillis = 2000
a1.sources.r1.kafka.bootstrap.servers = 172.0.0.1:8888
a1.sources.r1.kafka.topics=systemlog
a1.sources.r1.kafka.consumer.group.id=kafka_flume_hdfs_systemLog
#source1-interceptor
a1.sources.r1.interceptors=i1
a1.sources.r1.interceptors.i1.type=com.flume.interceptor.TestInterceptor$Builder
## channel1
a1.channels.c1.type = memory
# channel store size
a1.channels.c1.capacity = 100000
# transaction size
a1.channels.c1.transactionCapacity = 10000
## sink1
a1.sinks.k1.type = hdfs
a1.sinks.k1.hdfs.path = /data/log/
a1.sinks.k1.hdfs.filePrefix = log-
a1.sinks.k1.hdfs.fileSuffix = .log
## 不要产生大量小文乿
a1.sinks.k1.hdfs.rollInterval = 0
a1.sinks.k1.hdfs.rollSize = 67108864
a1.sinks.k1.hdfs.rollCount = 0
a1.sinks.k1.hdfs.rollInterval = 14400
a1.sinks.k1.hdfs.threadsPoolSize = 30
a1.sinks.k1.hdfs.minBlockReplicas=1
## 控制输出文件是原生文件⾿
a1.sinks.k1.hdfs.fileType = DataStream
a1.sinks.k1.hdfs.writeFormat = Text
## 拼装
a1.sources.r1.channels = c1
a1.sinks.k1.channel= c1
在flume目录下启动
bin/flume-ng agent -n a1 -c conf -f job/kafka-flume-interceptors-hdfs.conf -Dflume.root.logger=INFO,console
启动过程中
原始数据是 通过logstatsh 采集数据到kafka的
kafka中的数据是这样的
{"@timestamp":"2021-01-08T20:00:02.433Z","path":"/opt/spider/sigar_jar/log/log.log","message":"{\"time\":\"1610136001548\",\"ip\":\"192.168.1.200\",\"mac\":\"****\",\"cpulog\":{\"usage\":\"11\",\"corenum\":\"8\"},\"disklog\":{\"/dev/mapper/centos-root\":\"488209960960,476295784448,0\",\"/dev/sda1\":\"48186880,2359808,0\"},\"memorylog\":{\"used\":\"16045748224\",\"total\":\"16637550592\"},\"internetlog\":{\"rpackets\":\"312097715\",\"tpackets\":\"186182528\",\"rbytes\":\"25964917706\",\"tbytes\":\"102518768869\"},\"vmstat\":{\"us\":\"2\",\"sy\":\"10\"},\"uptime\":\"0.27,0.70,2.24\",\"iostat\":[{\"device\":\"sda\",\"util\":\"1.68\"},{\"device\":\"scd0\",\"util\":\"0.00\"},{\"device\":\"dm-0\",\"util\":\"1.34\"},{\"device\":\"dm-1\",\"util\":\"0.84\"}]}","host":"hdp13","type":"log","@version":"1"}
{"@timestamp":"2021-01-08T20:00:02.433Z","path":"/opt/spider/sigar_jar/log/log.log","message":"{\"time\":\"1610136001882\",\"ip\":\"192.168.1.200\",\"mac\":\"****\",\"cpulog\":{\"usage\":\"4\",\"corenum\":\"8\"},\"disklog\":{\"/dev/mapper/centos-root\":\"488209960960,476298598400,0\",\"/dev/sda1\":\"48186880,2359808,0\"},\"memorylog\":{\"used\":\"16031997952\",\"total\":\"16637550592\"},\"internetlog\":{\"rpackets\":\"312097761\",\"tpackets\":\"186182564\",\"rbytes\":\"25964926803\",\"tbytes\":\"102518774582\"},\"vmstat\":{\"us\":\"2\",\"sy\":\"10\"},\"uptime\":\"0.27,0.70,2.24\",\"iostat\":[{\"device\":\"sda\",\"util\":\"1.68\"},{\"device\":\"scd0\",\"util\":\"0.00\"},{\"device\":\"dm-0\",\"util\":\"1.34\"},{\"device\":\"dm-1\",\"util\":\"0.84\"}]}","host":"hdp13","type":"log","@version":"1"}
到hdfs的数据是这样的
{"time":"1610178255696","ip":"192.168.1.200","mac":"*****","cpulog":{"usage":"0","corenum":"8"},"disklog":{"/dev/mapper/centos-root":"490210693120,486625612800,0","/dev/sda1":"48186880,2380288,0"},"memorylog":{"used":"15882878976","total":"16637550592"},"internetlog":{"rpackets":"315253834","tpackets":"189356169","rbytes":"26328208815","tbytes":"103859485655"},"vmstat":{"us":"2","sy":"9"},"uptime":"0.40,0.95,0.99","iostat":[{"device":"sda","util":"1.69"},{"device":"scd0","util":"0.00"},{"device":"dm-0","util":"1.34"},{"device":"dm-1","util":"0.85"}]}
{"time":"1610178255696","ip":"192.168.1.200","mac":"*****","cpulog":{"usage":"0","corenum":"8"},"disklog":{"/dev/mapper/centos-root":"490210693120,486625612800,0","/dev/sda1":"48186880,2380288,0"},"memorylog":{"used":"15882878976","total":"16637550592"},"internetlog":{"rpackets":"315253834","tpackets":"189356169","rbytes":"26328208815","tbytes":"103859485655"},"vmstat":{"us":"2","sy":"9"},"uptime":"0.40,0.95,0.99","iostat":[{"device":"sda","util":"1.69"},{"device":"scd0","util":"0.00"},{"device":"dm-0","util":"1.34"},{"device":"dm-1","util":"0.85"}]}
简单的测试一下 flume的拦截器,有什么问题还请大佬多多指教