flume从kafka获取数据并按时间保存到hdfs上

#############################flume-config-name  从kafka获取数据并保存到hdfs上###########################
#source名称
flume-config-name.sources = source-flume-config
#channel名称
flume-config-name.channels = channel-flume-config
#sink名称
flume-config-name.sinks = sink-flume-config


#source flume-config 配置source
#配置kafka信息
flume-config-name.sources.source-flume-config.type = org.apache.flume.source.kafka.KafkaSource
#zk地址,多个以逗号分开
flume-config-name.sources.source-flume-config.zookeeperConnect = localhost:2181
#配置topic名称以及groupid
flume-config-name.sources.source-flume-config.topic = flume-topic
flume-config-name.sources.source-flume-config.groupId = flumev01
#以下为配置参数
flume-config-name.sources.source-flume-config.batchSize = 20000
flume-config-name.sources.source-flume-config.batchDurationMillis = 1000
flume-config-name.sources.source-flume-config.backoffSleepIncrement = 1000
flume-config-name.sources.source-flume-config.maxBackoffSleep = 5000
flume-config-name.sources.source-flume-config.kafka.consumer.timeout.ms = 100

#sink-flume-config
#sink类型为hdfs
flume-config-name.sinks.sink-flume-config.type = hdfs
#配置保存地址,需有flume的读写权限,设置chown flume:hdfs /test/test  , 按照时间存储直接按以下配置
flume-config-name.sinks.sink-flume-config.hdfs.path = /test/test/%Y-%m-%d/%H
#文件前缀
flume-config-name.sinks.sink-flume-config.hdfs.filePrefix = test
#如果文件正在占用,后缀为.tmp.gz
flume-config-name.sinks.sink-flume-config.hdfs.inUseSuffix = .tmp.gz
#正常文件后缀.log.gz
flume-config-name.sinks.sink-flume-config.hdfs.fileSuffix = .log.gz
#以下为相关配置
flume-config-name.sinks.sink-flume-config.hdfs.useLocalTimeStamp = true
flume-config-name.sinks.sink-flume-config.hdfs.rollInterval = 0
flume-config-name.sinks.sink-flume-config.hdfs.rollSize = 1073741824
flume-config-name.sinks.sink-flume-config.hdfs.rollCount = 0
flume-config-name.sinks.sink-flume-config.hdfs.batchSize = 20000
flume-config-name.sinks.sink-flume-config.hdfs.fileType = CompressedStream
flume-config-name.sinks.sink-flume-config.hdfs.codeC = gzip
flume-config-name.sinks.sink-flume-config.hdfs.idleTimeout = 300
flume-config-name.sinks.sink-flume-config.hdfs.threadsPoolSize = 10
flume-config-name.sinks.sink-flume-config.hdfs.callTimeout = 60000

#channel-flume-config
#配置channel类型
flume-config-name.channels.channel-flume-config.type = memory
#以下两个参数配置可根据实际情况配置
flume-config-name.channels.channel-flume-config.capacity=50000000
flume-config-name.channels.channel-flume-config.transactionCapacity=800000

#配置source的channel , 注意配置项为channels
flume-config-name.sources.source-flume-config.channels = channel-flume-config
#配置sink的channel , 注意配置项为channel
flume-config-name.sinks.sink-flume-config.channel = channel-flume-config
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值