进入flume安装目录的conf配置文件夹:
[root@qiyu01 apache-flume-1.6.0-bin]# cd /opt/modules/apache-flume-1.6.0-bin/conf
创建flume配置文件并编辑:
[root@qiyu01 conf]# vi flumeByHDFS_Kafka.conf
文件内容:(我的是ambari+hdp集群,kafka的端口为6667)
#通过sink把数据分别输出到kafka和HDFS上
# Name the components on this agent
agent.sources = r1
agent.sinks = k1 k2
agent.channels = c1 c2
# Describe/configuration the source
agent.sources.r1.type = exec
agent.sources.r1.command = tail -f /opt/flume_test/flumeByHDFS_Kafka.log
agent.sources.r1.shell = /bin/bash -c
## kafka
#Describe the sink
agent.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
agent.sinks.k1.topic = flumeByKafka_HDFS
agent.sinks.k1.brokerList = qiyu01.com:6667,qiyu02.com:6667,qiyu03.com:6667
agent.sinks.k1.requiredAcks = 1
agent.sinks.k1.batchSize = 2
# Use a channel which buffers events in memory
agent.channels.c1.type = memory
agent.channels.c1.capacity = 1000
#agent.channels.c1.transactionCapacity = 100
# Bind the source and sink to the channel
agent.sources.r1.channels = c1 c2
agent.sinks.k1.channel = c1
## hdfs
#Describe the sink
agent.sinks.k2.type = hdfs
agent.sinks.k2.hdfs.path = hdfs://qiyu01.com:8020/weblog/%y/%m
agent.sinks.k2.hdfs.fileType=DataStream
agent.sinks.k2.hdfs.writeFormat=Text
agent.sinks.k2.hdfs.rollInterval = 0
agent.sinks.k2.hdfs.rollSize = 134217728
agent.sinks.k2.hdfs.rollCount = 0
agent.sinks.k2.hdfs.batchSize=10
agent.sinks.k2.hdfs.useLocalTimeStamp = true
## Use a channel which buffers events in memory
agent.channels.c2.type = memory
#agent.channels.c1.capacity = 1000
#agent.channels.c2.transactionCapacity = 100
## Bind the source and sink to the channel
#agent.sources.r1.channels = c2
agent.sinks.k2.channel = c2
回到flume的安装目录并执行启动flume:
[root@qiyu01 conf]# cd ..
[root@qiyu01 apache-flume-1.6.0-bin]# bin/flume-ng agent --conf-file conf/flumeByHDFS_Kafka.conf -c conf/ --name agent -Dflume.root.logger=DEBUG,console
创建kafka的topic:
[root@qiyu01 apache-flume-1.6.0-bin]# cd /usr/hdp/3.0.1.0-187/kafka/
[root@qiyu01 kafka]# bin/kafka-topics.sh --create --zookeeper qiyu01.com:2181,qiyu02.com:2181,qiyu03.com:2181 --replication-factor 1 --partitions 1 --topic flumeByKafka_HDFS
写入数据:
检测数据:
打开新终端,在kafka安装目录下执行如下命令,生成对topic flumeByKafka_HDFS的消费
kakfa消费:
hdfs存储情况:
成功!hdfs上的_tmp文件是临时文件。达到flume文件配置的是128M才会滚动文件,形成可用的文件