flume配置-生产环境下从文件目录下将日志上传到s3

本文介绍了如何在生产环境中使用Flume从文件目录高效地收集日志,并通过多个spoolDir source和hdfs sink提升数据传输速率,实现日志批量上传到S3存储。
摘要由CSDN通过智能技术生成

生产环境下将收集到的日志上传至s3,采用多个spoolDir soure 和多个hdfs sink的方式是为了提高读取数据,上传数据的吞吐量。


clog.sources = source_log1 source_log2 
clog.channels = channel_log
clog.sinks = sink_log1 sink_log2 sink_log3 sink_log4 sink_log5 sink_log6 

clog.sources.source_log1.type = spooldir
clog.sources.source_log1.spoolDir = /home/data/log1
clog.sources.source_log1.deletePolicy = immediate
clog.sources.source_log1.batchSize = 1000
clog.sources.source_log1.deserializer.maxLineLength = 999999
clog.sources.source_log1.basenameHeader = true
clog.sources.source_log1.ignorePattern = ^[^0-9].*
clog.sources.source_log1.decodeErrorPolicy = IGNORE
clog.sources.source_log1.interceptors = i1
clog.sources.source_log1.interceptors.i1.type = org.apache.flume.interceptor.RegexExtractorHeaderInterceptor$Builder
clog.sources.source_log1.interceptors.i1.regex = (\\d{8})(\\d{2})(\\d{2})-(.*)-(.*)-(.*)-(.*)-(.*)\\.log
clog.sources.source_log1.interceptors.i1.serializers=s1 s2 s3 s4 s5 s6 s7 s8
clog.sources.source_log1.interceptors.i1.serializers.s1.name=day
clog.sources.source_log1.interceptors.i1.serializers.s2.name=hour
clog.sources.source_log1.interceptors.i1.serializers.s3.name=minute
clog.sources.source_log1.interceptors.i1.serializers.s4.name=project
clog.sources.source_log1.interceptors.i1.serializers.s5.name=machine
clog.sources.source_log1.interceptors.i1.serializers.s6.name=region
clog.sources.source_log1.interceptors.i1.serializers.s7.name=module
clog.sources.source_log1.interceptors.i1.serializers.s8.name=service
clog.sources.source_log1.channels = channel_log

clog.sources.source_log2.type = spooldir
clog.sources.source_log2.spoolDir = /home/data/log2
clog.sources.source_log2.deletePolicy = immediate
clog.sources.source_log2.batchSize = 1000
clog.sources.source_log2.deserializer.maxLineLength = 999999
clog.sources.source_log2.basenameHeader = true
clog.sources.source_log2.ignorePattern = ^[^0-9].*
clog.sources.source_log2.decodeErrorPolicy = IGNORE
clog.sources.source_log2.interceptors = i1
clog.sources.source_log2.interceptors.i1.type = org.apache.flume.interceptor.RegexExtractorHeaderInterceptor$Builder
clog.sources.source_log2.interceptors.i1.regex = (\\d{8})(\\d{2})(\\d{2})-(.*)-(.*)-(.*)-(.*)-(.*)\\.log
clog.sources.source_log2.interceptors.i1.serializers=s1 s2 s3 s4 s5 s6 s7 s8
clog.sources.source_log2.interceptors.i1.serializers.s1.name=day
clog.sources.source_log2.interceptors.i1.serializers.s2.name=hour
clog.sources.source_log2.interceptors.i1.serializers.s3.name=minute
clog.sources.source_log2.interceptors.i1.serializers.s4.name=project
clog.sources.source_log2.interceptors.i1.serializers.s5.name=machine
clog.sources.source_log2.interceptors.i1.serializers.s6.name=region
clog.sources.source_log2.interceptors.i1.serializers.s7.name=module
clog.sources.source_log2.interceptors.i1.serializers.s8.name=service
clog.sources.source_log2.channels = channel_log



clog.sinks.sink_log1.type = hdfs
clog.sinks.sink_log1.hdfs.path = s3n://aws_access_key_idaws_access_key_id:aws_secret_access_key/%{service}/%{day}/%{hour}
clog.sinks.sink_log1.hdfs.filePrefix = %{minute}
clog.sinks.sink_log1.hdfs.fileSuffix = .1.lzo
clog.sinks.sink_log1.hdfs.rollSize = 0
clog.sinks.sink_log1.hdfs.rollCount = 0
clog.sinks.sink_log1.hdfs.rollInterval = 0
clog.sinks.sink_log1.hdfs.idleTimeout = 180
clog.sinks.sink_log1.hdfs.callTimeout = 600000
clog.sinks.sink_log1.hdfs.batchSize = 1000
clog.sinks.sink_log1.hdfs.codeC = lzop
clog.sinks.sink_log1.hdfs.fileType = CompressedStream
clog.sinks.sink_log1.hdfs.writeFormat = Text
clog.sinks.sink_log1.channel = channel_log

clog.sinks.sink_log2.type = hdfs
clog.sinks.sink_log2.hdfs.path = s3n://aws_access_key_idaws_access_key_id:aws_secret_access_key/%{service}/%{day}/%{hour}
clog.sinks.sink_log2.hdfs.filePrefix = %{minute}
clog.sinks.sink_log2.hdfs.fileSuffix = .2.lzo
clog.sinks.sink_log2.hdfs.rollSize = 0
clog.sinks.sink_log2.hdfs.rollCount = 0
clog.sinks.sink_log2.hdfs.rollInterval = 0
clog.sinks.sink_log2.hdfs.idleTimeout = 180
clog.sinks.sink_log2.hdfs.callTimeout = 600000
clog.sinks.sink_log2.hdfs.batchSize = 1000
clog.sinks.sink_log2.hdfs.codeC = lzop
clog.sinks.sink_log2.hdfs.fileType = CompressedStream
clog.sinks.sink_log2.hdfs.writeFormat = Text
clog.sinks.sink_log2.channel = channel_log

clog.sinks.sink_log3.type = hdfs
clog.sinks.sink_log3.hdfs.path = s3n://aws_access_key_idaws_access_key_id:aws_secret_access_key/%{service}/%{day}/%{hour}
clog.sinks.sink_log3.hdfs.filePrefix = %{minute}
clog.sinks.sink_log3.hdfs.fileSuffix = .3.lzo
clog.sinks.sink_log3.hdfs.rollSize = 0
clog.sinks.sink_log3.hdfs.rollCount = 0
clog.sinks.sink_log3.hdfs.rollInterval = 0
clog.sinks.sink_log3.hdfs.idleTimeout = 180
clog.sinks.sink_log3.hdfs.callTimeout = 600000
clog.sinks.sink_log3.hdfs.batchSize = 1000
clog.sinks.sink_log3.hdfs.codeC = lzop
clog.sinks.sink_log3.hdfs.fileType = CompressedStream
clog.sinks.sink_log3.hdfs.writeFormat = Text
clog.sinks.sink_log3.channel = channel_log

clog.sinks.sink_log4.type = hdfs
clog.sinks.sink_log4.hdfs.path = s3n://aws_access_key_idaws_access_key_id:aws_secret_access_key/%{service}/%{day}/%{hour}
clog.sinks.sink_log4.hdfs.filePrefix = %{minute}
clog.sinks.sink_log4.hdfs.fileSuffix = .4.lzo
clog.sinks.sink_log4.hdfs.rollSize = 0
clog.sinks.sink_log4.hdfs.rollCount = 0
clog.sinks.sink_log4.hdfs.rollInterval = 0
clog.sinks.sink_log4.hdfs.idleTimeout = 180
clog.sinks.sink_log4.hdfs.callTimeout = 600000
clog.sinks.sink_log4.hdfs.batchSize = 1000
clog.sinks.sink_log4.hdfs.codeC = lzop
clog.sinks.sink_log4.hdfs.fileType = CompressedStream
clog.sinks.sink_log4.hdfs.writeFormat = Text
clog.sinks.sink_log4.channel = channel_log

clog.sinks.sink_log5.type = hdfs
clog.sinks.sink_log5.hdfs.path = s3n://aws_access_key_idaws_access_key_id:aws_secret_access_key/%{service}/%{day}/%{hour}
clog.sinks.sink_log5.hdfs.filePrefix = %{minute}
clog.sinks.sink_log5.hdfs.fileSuffix = .5.lzo
clog.sinks.sink_log5.hdfs.rollSize = 0
clog.sinks.sink_log5.hdfs.rollCount = 0
clog.sinks.sink_log5.hdfs.rollInterval = 0
clog.sinks.sink_log5.hdfs.idleTimeout = 180
clog.sinks.sink_log5.hdfs.callTimeout = 600000
clog.sinks.sink_log5.hdfs.batchSize = 1000
clog.sinks.sink_log5.hdfs.codeC = lzop
clog.sinks.sink_log5.hdfs.fileType = CompressedStream
clog.sinks.sink_log5.hdfs.writeFormat = Text
clog.sinks.sink_log5.channel = channel_log

clog.sinks.sink_log6.type = hdfs
clog.sinks.sink_log6.hdfs.path = s3n://aws_access_key_idaws_access_key_id:aws_secret_access_key/%{service}/%{day}/%{hour}
clog.sinks.sink_log6.hdfs.filePrefix = %{minute}
clog.sinks.sink_log6.hdfs.fileSuffix = .6.lzo
clog.sinks.sink_log6.hdfs.rollSize = 0
clog.sinks.sink_log6.hdfs.rollCount = 0
clog.sinks.sink_log6.hdfs.rollInterval = 0
clog.sinks.sink_log6.hdfs.idleTimeout = 180
clog.sinks.sink_log6.hdfs.callTimeout = 600000
clog.sinks.sink_log6.hdfs.batchSize = 1000
clog.sinks.sink_log6.hdfs.codeC = lzop
clog.sinks.sink_log6.hdfs.fileType = CompressedStream
clog.sinks.sink_log6.hdfs.writeFormat = Text
clog.sinks.sink_log6.channel = channel_log


clog.channels.channel_log.type = memory
clog.channels.channel_log.capacity = 100000
clog.channels.channel_log.transactionCapacity = 10000

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值