(3)flume 单节点写入HDFS练习以及自定义拦截器进行formatLog

最新推荐文章于 2023-04-15 20:50:47 发布

pandajava

最新推荐文章于 2023-04-15 20:50:47 发布

阅读量1.3k

点赞数

分类专栏： Apache Flume

本文链接：https://blog.csdn.net/pandajava/article/details/41963733

版权

Apache Flume 专栏收录该内容

5 篇文章 0 订阅

订阅专栏

(1)参考http://my.oschina.net/leejun2005/blog/288136#OSC_h2_10

(2)flume 用hdfs sink的时候需要用到hadoop的相关jar包。使用cdh版本的会自带相关的jar包

(3)flume_directHDFS2.conf

# Firstly, now that we've defined all of our components, tell agent1 which ones we want to activate.
agent1.sources = exec-source1
agent1.channels = ch1
agent1.sinks = log-sink1



##define -- Exec Source
#type       The component type name, needs to be exec  (required)
#shell      A shell invocation used to run the command 
#command    The command to execute  (required)
#channels   (required)

agent1.sources.exec-source1.type = exec
agent1.sources.exec-source1.shell = /bin/bash -c
agent1.sources.exec-source1.command = tail -n +0 -F /usr/local/nginx/logs/vdnlog_access.log
agent1.sources.exec-source1.channels = ch1


##define -- Memory Channel called ch1 on agent1
#type			The component type name, needs to be memory (required)
#capacity		The maximum number of events stored in the channel
#transactionCapacity	The maximum number of events the channel will take from a source or give to a sink per transaction
#keep-alive		Timeout in seconds for adding or removing an event
agent1.channels.ch1.type = memory
agent1.channels.ch1.capacity = 100000
agent1.channels.ch1.transactionCapacity = 100000
agent1.channels.ch1.keep-alive = 30
 
# Define -- Hdfs Sink
#type			The component type name, needs to be hdfs  (required)
#channel		(required)
#hdfs.path		HDFS directory path (eg hdfs://namenode/flume/webdata/) (required)
#hdfs.writeFormat       Format for sequence file records. One of “Text” or “Writable” (the default).
#hdfs.fileType		File format: currently SequenceFile, DataStream or CompressedStream (1)DataStream will not compress output file and please don’t set codeC (2)CompressedStream requires set hdfs.codeC with an available codeC
#hdfs.filePrefix	Name prefixed to files created by Flume in hdfs directory
#hdfs.fileSuffix	Suffix to append to file (eg .avro - NOTE: period is not automatically added)
#hdfs.round		Should the timestamp be rounded down
#hdfs.roundValue	Rounded down to the highest multiple of this (in the unit configured using hdfs.roundUnit), less than current time.
#
#按照10分钟滚动一次  这三个参数 设置为0 不然不起作用。
#agent1.sinks.log-sink1.hdfs.rollInterval= 0
#agent1.sinks.log-sink1.hdfs.rollSize = 0
#agent1.sinks.log-sink1.hdfs.rollCount = 0
#
#此时日志带.tmp
#idleTimeout=5  Timeout after which inactive files get closed
###################
agent1.sinks.log-sink1.type = hdfs
agent1.sinks.log-sink1.channel = ch1
agent1.sinks.log-sink1.hdfs.path = hdfs://101.240.151.41:9000/test/pjm/%y-%m-%d
agent1.sinks.log-sink1.hdfs.writeFormat = Text
agent1.sinks.log-sink1.hdfs.fileType = DataStream
agent1.sinks.log-sink1.hdfs.filePrefix = flume_%y-%m-%d_%H%M%S
agent1.sinks.log-sink1.hdfs.fileSuffix = .log
agent1.sinks.log-sink1.hdfs.round = true
agent1.sinks.log-sink1.hdfs.roundValue = 10
agent1.sinks.log-sink1.hdfs.roundUnit = minute
agent1.sinks.log-sink1.hdfs.rollInterval= 0
agent1.sinks.log-sink1.hdfs.rollSize = 0
agent1.sinks.log-sink1.hdfs.rollCount = 0
agent1.sinks.log-sink1.hdfs.useLocalTimeStamp = true
agent1.sinks.log-sink1.hdfs.callTimeout = 20000
agent1.sinks.log-sink1.hdfs.idleTimeout=5

（此处按照时间滚动文件 10分钟一个文件）

bin/flume-ng agent --conf conf --conf-file ./conf/flume_directHDFS2.conf --name agent1 -Dflume.root.logger=INFO,console

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

//自定义拦截器

# Firstly, now that we've defined all of our components, tell agent1 which ones we want to activate.
agent.sources = exec-source1
agent.channels = memchannellv memchannelerr memchannelbf memchannelfs memchannelother
agent.sinks = hdfssinklv hdfssinkerr hdfssinkbf hdfssinkfs hdfssinkother



##define -- Exec Source
#type       The component type name, needs to be exec  (required)
#shell      A shell invocation used to run the command 
#command    The command to execute  (required)
#channels   (required)

agent.sources.exec-source1.type = exec
agent.sources.exec-source1.shell = /bin/bash -c
agent.sources.exec-source1.command = tail -F /usr/local/nginx/logs/vdnlog_access.log
agent.sources.exec-source1.interceptors = timestamp nginxlogformat
agent.sources.exec-source1.interceptors.nginxlogformat.type = com.cntv.bigdata.flume.interceptor.NginxInterceptor$Builder
agent.sources.exec-source1.interceptors.timestamp.type = timestamp


##sources selector
agent.sources.exec-source1.selector.type = multiplexing
agent.sources.exec-source1.selector.header = type
agent.sources.exec-source1.selector.mapping.lv = memchannellv
agent.sources.exec-source1.selector.mapping.err = memchannelerr
agent.sources.exec-source1.selector.mapping.bf = memchannelbf
agent.sources.exec-source1.selector.mapping.fs = memchannelfs
agent.sources.exec-source1.selector.default = memchannelother
agent.sources.exec-source1.channels = memchannellv memchannelerr memchannelbf memchannelfs memchannelother



##define -- Memory Channel called ch1 on agent1
#type			The component type name, needs to be memory (required)
#capacity		The maximum number of events stored in the channel
#transactionCapacity	The maximum number of events the channel will take from a source or give to a sink per transaction
#keep-alive		Timeout in seconds for adding or removing an event


 
agent.channels.memchannellv.type = memory
agent.channels.memchannellv.capacity = 10000
agent.channels.memchannellv.transactionCapacity = 10000
agent.channels.memchannellv.keep-alive = 3

agent.channels.memchannelerr.type = memory
agent.channels.memchannelerr.capacity = 10000
agent.channels.memchannelerr.transactionCapacity = 10000
agent.channels.memchannelerr.keep-alive = 3


agent.channels.memchannelbf.type = memory
agent.channels.memchannelbf.capacity = 10000
agent.channels.memchannelbf.transactionCapacity = 10000
agent.channels.memchannelbf.keep-alive = 3

agent.channels.memchannelfs.type = memory
agent.channels.memchannelfs.capacity = 10000
agent.channels.memchannelfs.transactionCapacity = 10000
agent.channels.memchannelfs.keep-alive = 3


agent.channels.memchannelother.type = memory
agent.channels.memchannelother.capacity = 10000
agent.channels.memchannelother.transactionCapacity = 10000
agent.channels.memchannelother.keep-alive = 3



# Define -- Hdfs Sink
#type			The component type name, needs to be hdfs  (required)
#channel		(required)
#hdfs.path		HDFS directory path (eg hdfs://namenode/flume/webdata/) (required)
#hdfs.writeFormat       Format for sequence file records. One of “Text” or “Writable” (the default).
#hdfs.fileType		File format: currently SequenceFile, DataStream or CompressedStream (1)DataStream will not compress output file and please don’t set codeC (2)CompressedStream requires set hdfs.codeC with an available codeC
#hdfs.filePrefix	Name prefixed to files created by Flume in hdfs directory
#hdfs.fileSuffix	Suffix to append to file (eg .avro - NOTE: period is not automatically added)
#hdfs.round		Should the timestamp be rounded down
#hdfs.roundValue	Rounded down to the highest multiple of this (in the unit configured using hdfs.roundUnit), less than current time.
#
#按照10分钟滚动一次  这三个参数 设置为0 不然不起作用。
#agent1.sinks.log-sink1.hdfs.rollInterval= 0
#agent1.sinks.log-sink1.hdfs.rollSize = 0
#agent1.sinks.log-sink1.hdfs.rollCount = 0
#
#
####################

#######lv
agent.sinks.hdfssinklv.type = hdfs
agent.sinks.hdfssinklv.hdfs.fileType = DataStream
agent.sinks.hdfssinklv.hdfs.idleTimeout = 60
agent.sinks.hdfssinklv.hdfs.round = true
agent.sinks.hdfssinklv.hdfs.roundValue = 10
agent.sinks.hdfssinklv.hdfs.roundUnit = minute
agent.sinks.hdfssinklv.hdfs.rollInterval = 0
agent.sinks.hdfssinklv.hdfs.rollSize = 0
agent.sinks.hdfssinklv.hdfs.rollCount = 0
agent.sinks.hdfssinklv.hdfs.path = hdfs://10.240.15.4:9000/test/pjm/xxoo/lv/%y-%m-%d
agent.sinks.hdfssinklv.hdfs.filePrefix = flume_bjxd02Lv_%y-%m-%d_%H%M%S
agent.sinks.hdfssinklv.hdfs.fileSuffix = .log
agent.sinks.hdfssinklv.channel = memchannellv

#######err
agent.sinks.hdfssinkerr.type = hdfs
agent.sinks.hdfssinkerr.hdfs.fileType = DataStream
agent.sinks.hdfssinkerr.hdfs.idleTimeout = 60
agent.sinks.hdfssinkerr.hdfs.round = true
agent.sinks.hdfssinkerr.hdfs.roundValue = 10
agent.sinks.hdfssinkerr.hdfs.roundUnit = minute
agent.sinks.hdfssinkerr.hdfs.rollInterval = 0
agent.sinks.hdfssinkerr.hdfs.rollSize = 0
agent.sinks.hdfssinkerr.hdfs.rollCount = 0
agent.sinks.hdfssinkerr.hdfs.path = hdfs://101.240.151.41:9000/test/pjm/xxoo/err/%y-%m-%d
agent.sinks.hdfssinkerr.hdfs.filePrefix = flume_bjxd02Err_%y-%m-%d_%H%M%S
agent.sinks.hdfssinkerr.hdfs.fileSuffix = .log
agent.sinks.hdfssinkerr.channel = memchannelerr

#######bf
agent.sinks.hdfssinkbf.type = hdfs
agent.sinks.hdfssinkbf.hdfs.fileType = DataStream
agent.sinks.hdfssinkbf.hdfs.idleTimeout = 60
agent.sinks.hdfssinkbf.hdfs.round = true
agent.sinks.hdfssinkbf.hdfs.roundValue = 10
agent.sinks.hdfssinkbf.hdfs.roundUnit = minute
agent.sinks.hdfssinkbf.hdfs.rollInterval = 0
agent.sinks.hdfssinkbf.hdfs.rollSize = 0
agent.sinks.hdfssinkbf.hdfs.rollCount = 0
agent.sinks.hdfssinkbf.hdfs.path = hdfs://101.240.151.41:9000/test/pjm/xxoo/bf/%y-%m-%d
agent.sinks.hdfssinkbf.hdfs.filePrefix = flume_bjxd02Bf_%y-%m-%d_%H%M%S
agent.sinks.hdfssinkbf.hdfs.fileSuffix = .log
agent.sinks.hdfssinkbf.channel = memchannelbf


#######fs
agent.sinks.hdfssinkfs.type = hdfs
agent.sinks.hdfssinkfs.hdfs.fileType = DataStream
agent.sinks.hdfssinkfs.hdfs.idleTimeout = 60
agent.sinks.hdfssinkfs.hdfs.round = true
agent.sinks.hdfssinkfs.hdfs.roundValue = 10
agent.sinks.hdfssinkfs.hdfs.roundUnit = minute
agent.sinks.hdfssinkfs.hdfs.rollInterval = 0
agent.sinks.hdfssinkfs.hdfs.rollSize = 0
agent.sinks.hdfssinkfs.hdfs.rollCount = 0
agent.sinks.hdfssinkfs.hdfs.path = hdfs://101.240.151.41:9000/test/pjm/xxoo/fs/%y-%m-%d
agent.sinks.hdfssinkfs.hdfs.filePrefix = flume_bjxd02Fs_%y-%m-%d_%H%M%S
agent.sinks.hdfssinkfs.hdfs.fileSuffix = .log
agent.sinks.hdfssinkfs.channel = memchannelfs

#######other
agent.sinks.hdfssinkother.type = hdfs
agent.sinks.hdfssinkother.hdfs.fileType = DataStream
agent.sinks.hdfssinkother.hdfs.idleTimeout = 60
agent.sinks.hdfssinkother.hdfs.round = true
agent.sinks.hdfssinkother.hdfs.roundValue = 10
agent.sinks.hdfssinkother.hdfs.roundUnit = minute
agent.sinks.hdfssinkother.hdfs.rollInterval = 0
agent.sinks.hdfssinkother.hdfs.rollSize = 0
agent.sinks.hdfssinkother.hdfs.rollCount = 0
agent.sinks.hdfssinkother.hdfs.path = hdfs://101.2401.151.4:9000/test/pjm/xxoo/other/%y-%m-%d
agent.sinks.hdfssinkother.hdfs.filePrefix = flume_bjxd02Other_%y-%m-%d_%H%M%S
agent.sinks.hdfssinkother.hdfs.fileSuffix = .log
agent.sinks.hdfssinkother.channel = memchannelother

xxxx@localhost flume]$ ./bin/flume-ng agent --conf conf --conf-file ./conf/flume_directHDFS3.properties --name agent -Dflume.root.logger=DEBUG,console,LOGFILE