flume的几种简单拦截器记录

##对log的刷新,进行数据抽取,运用host intercepter
###define agent
a9.sources = r9
a9.channels = c9
a9.sinks = k9


#define sources
a9.sources.r9.type = exec
a9.sources.r9.command = tail -f /var/log/httpd/access_log 
a9.sources.r9.shell = /bin/bash -c
a9.sources.r9.interceptors = i1
#将主机名写入header   
a9.sources.r9.interceptors.i1.type = host
a9.sources.r9.interceptors.i1.useIP = false
#key-value   中的key==>agentHost
a9.sources.r9.interceptors.i1.hostHeader = agentHost


#------------------static intercepter-----------------------------------
#可以在sink阶段调用static_value   加入到生成的文件或者归档文件夹
#agent_lxw1234.sources.sources1.interceptors = i1
#agent_lxw1234.sources.sources1.interceptors.i1.type = static
#agent_lxw1234.sources.sources1.interceptors.i1.preserveExisting = true
#agent_lxw1234.sources.sources1.interceptors.i1.key = static_key
#agent_lxw1234.sources.sources1.interceptors.i1.value = static_value
#-----------------------------------------------------------------------


#------------------regex intercepter-----------------------------------
a1.sources.r1.interceptors = i1  
a1.sources.r1.interceptors.i1.type =regex_filter  
#满足此正则表达式的数据 进行过滤,这里会对0-9开头的数据,进行过滤
a1.sources.r1.interceptors.i1.regex =^[0-9]*$  
a1.sources.r1.interceptors.i1.excludeEvents =true 
#-----------------------------------------------------------------------


#define channels
a9.channels.c9.type = file
#a9.channels.c9.capacity = 100
#a9.channels.c9.transactionCapacity = 10
a9.channels.c9.checkpointDir = /opt/module/cdh/flume-1.6.0-cdh5.10.0/flume_file/checkpoint
a9.channels.c9.dataDirs = /opt/module/cdh/flume-1.6.0-cdh5.10.0/flume_file/data


#define sinks
a9.sinks.k9.type = hdfs
a9.sinks.k9.hdfs.useLocalTimeStamp = true
#调用useLocalTimeStamp   也相当于一个时间的过滤器,对文件夹进行时间上的归档
a9.sinks.k9.hdfs.path = hdfs://make.hadoop.com:8020/user/make/flume/apache_log/%y-%m-%d/%H-%M
#调用hostname
a9.sinks.k9.hdfs.filePrefix = %{agentHost}
a9.sinks.k9.hdfs.fileSuffix = log
#控制 inuse_suff 的时间
a9.sinks.k9.hdfs.rollInterval = 60
#控制 inuse_suff 的大小
a9.sinks.k9.hdfs.rollSize = 3036
#控制event的数量
a9.sinks.k9.hdfs.rollCount = 10
#时间的取舍是否开启
a9.sinks.k9.hdfs.round = true
#控制时间上的取舍,在1分钟以内  都归档为一个文件夹
a9.sinks.k9.hdfs.roundValue = 1
#时间的单位
a9.sinks.k9.hdfs.roundUnit = minute
a9.sinks.k9.hdfs.fileType = DataStream
a9.sinks.k9.hdfs.writeFormat = text
a9.sinks.k9.hdfs.batchSize= 10


#bind 
a9.sources.r9.channels = c9

a9.sinks.k9.channel = c9

这上面有host static regex 三种拦截器的最简单用法,经过实测,可行,但是不知道实际运用应该是怎样的,先记下来

对于正则对日志数据的清洗,请参考:https://blog.csdn.net/maketubu7/article/details/80739375

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值