编写java自定义过滤程序
新建一个maven项目
在pom.xml中添加
<dependency>
<groupId>org.apache.flume</groupId>
<artifactId>flume-ng-core</artifactId>
<version>1.6.0</version>
</dependency>
新建java程序
package flume;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.interceptor.Interceptor;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/**
* @Description 自定义拦截器
* event:header、body
* body内容hello开头 则给当前的event header 打入hello标签
* body内容hi开头 则给当前的event header 打入hi标签
*/
public class interceptorDemo implements Interceptor {
ArrayList<Event> addHeaderEvents = null;
@Override
public void initialize() {
addHeaderEvents = new ArrayList<>();
}
@Override
public Event intercept(Event event) {
Map<String, String> headers = event.getHeaders();
byte[] body = event.getBody();
String bodyStr = new String(body);
if (bodyStr.startsWith("hello")) {
headers.put("type", "hello");
}
else if (bodyStr.startsWith("hi")) {
headers.put("type", "hi");
}
else {
headers.put("type", "other");
}
return event;
}
@Override
public List<Event> intercept(List<Event> list) {
addHeaderEvents.clear();
for (Event event : list) {
Event opEvent = intercept(event);
addHeaderEvents.add(opEvent);
}
return addHeaderEvents;
}
@Override
public void close() {
addHeaderEvents.clear();
addHeaderEvents = null;
}
//内部类
public static class Builder implements Interceptor.Builder {
@Override
public Interceptor build() {
return new interceptorDemo();
}
@Override
public void configure(Context context) {
}
}
}
将程序打jar包
将jar包放在flume/lib
文件夹下
在flume中测试jar包
准备测试前配置相关环境
启动Hadoopstart-all.sh
启动zookeeper/root/software/zkpr/bin/zkServer.sh start
启动kafka(后台)
kafka-server-start.sh -daemon /root/software/kafka/config/server.properties
全部启动完成后,输入jps
如下图所示
接下来
在flume/conf/job
文件夹下,创建vi netcat-flume-interceptor.conf
配置文件
interceptordemo.sources=isource
interceptordemo.channels=hellochannel hichannel otherchannel
interceptordemo.sinks=hellosink hisink othersink
interceptordemo.sources.isource.type=netcat
interceptordemo.sources.isource.bind=localhost
interceptordemo.sources.isource.port=55555
interceptordemo.sources.isource.interceptors=interceptor1
interceptordemo.sources.isource.interceptors.interceptor1.type=flume.interceptorDemo$Buiilder
interceptordemo.sources.isource.selector.type=multiplexing
interceptordemo.sources.isource.selector.mapping.hello=hellochannel
interceptordemo.sources.isource.selector.mapping.hi=hichannel
interceptordemo.sources.isource.selector.mapping.other=otherchannel
interceptordemo.sources.isource.selector.header=type
interceptordemo.channels.hellochannel.type=memory
interceptordemo.channels.hellochannel.capacity=1000
interceptordemo.channels.hellochannel.transactionCapacity=100
interceptordemo.channels.hichannel.type=memory
interceptordemo.channels.hichannel.capacity=1000
interceptordemo.channels.hichannel.transactionCapacity=100
interceptordemo.channels.otherchannel.type=memory
interceptordemo.channels.otherchannel.capacity=1000
interceptordemo.channels.otherchannel.transactionCapacity=100
interceptordemo.sinks.hellosink.type=hdfs
interceptordemo.sinks.hellosink.hdfs.filetype=DataStream
interceptordemo.sinks.hellosink.hdfs.filePrefix=hello
interceptordemo.sinks.hellosink.hdfs.fileSuffix=.csv
interceptordemo.sinks.hellosink.hdfs.path=hdfs://192.168.150.100:9000/11/hello/%Y-%m-%d
interceptordemo.sinks.hellosink.hdfs.useLocalTimeStamp=true
interceptordemo.sinks.hellosink.hdfs.batchSize=640
interceptordemo.sinks.hellosink.hdfs.rollCount=0
interceptordemo.sinks.hellosink.hdfs.rollSize=6400000
interceptordemo.sinks.hellosink.hdfs.rollInterval=3
interceptordemo.sinks.hisink.type=org.apache.flume.sink.kafka.KafkaSink
interceptordemo.sinks.hisink.batchSize=640
interceptordemo.sinks.hisink.brokerList=192.168.150.100:9092
interceptordemo.sinks.hisink.topic=hi
interceptordemo.sinks.othersink.type=logger
interceptordemo.sources.isource.channels=hellochannel hichannel otherchannel
interceptordemo.sinks.hellosink.channel=hellochannel
interceptordemo.sinks.hisink.channel=hichannel
interceptordemo.sinks.othersink.channel=otherchannel
启动测试
新建一个窗口输入flume监视命令
flume监视命令
/root/software/flume/bin/flume-ng agent --name interceptordemo --conf /root/software/flume/conf --conf-file /root/software/flume/conf/job/netcat-flume-interceptor.conf -Dflume.root.logger=INFO,console
netcat-flume-interceptor.conf 为配置文件
interceptordemo 为配置文件中的agent名
新建窗口创建topic用来消费消息
创建名为 hi 的topic
kafka-topics.sh --zookeeper 192.168.150.100:2181 --create --topic hi --partitions 1 --replication-factor 1
启动kafka消费消息
kafka-console-consumer.sh --topic hi--bootstrap-server 192.168.150.100:9092 --from-beginning
新建窗口用来输入命令
telnet localhost 55555
效果:在telnet localhost 55555
中输入hi
,数据将会被送到kafka中名为hi
的topic中
输入hello
,数据将会写入到HDFS中,路径为hdfs://192.168.150.100:9000/11/hello