Flume学习之同时输出数据到HDFS和Kafka

最新推荐文章于 2022-08-19 09:06:46 发布

只爱大锅饭

最新推荐文章于 2022-08-19 09:06:46 发布

阅读量670

点赞数

分类专栏：大数据学习之路文章标签： flume kafka hdfs

本文链接：https://blog.csdn.net/qq_35370485/article/details/118363285

版权

大数据学习之路专栏收录该内容

30 篇文章 5 订阅

订阅专栏

进入flume安装目录的conf配置文件夹：

[root@qiyu01 apache-flume-1.6.0-bin]# cd /opt/modules/apache-flume-1.6.0-bin/conf

创建flume配置文件并编辑：

[root@qiyu01 conf]# vi flumeByHDFS_Kafka.conf

文件内容：（我的是ambari+hdp集群，kafka的端口为6667）

#通过sink把数据分别输出到kafka和HDFS上

# Name the components on this agent
agent.sources = r1
agent.sinks = k1 k2
agent.channels = c1 c2

# Describe/configuration the source
agent.sources.r1.type = exec
agent.sources.r1.command = tail -f /opt/flume_test/flumeByHDFS_Kafka.log
agent.sources.r1.shell = /bin/bash -c 

## kafka
#Describe the sink
agent.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
agent.sinks.k1.topic = flumeByKafka_HDFS
agent.sinks.k1.brokerList = qiyu01.com:6667,qiyu02.com:6667,qiyu03.com:6667
agent.sinks.k1.requiredAcks = 1
agent.sinks.k1.batchSize = 2

# Use a channel which buffers events in memory 
agent.channels.c1.type = memory
agent.channels.c1.capacity = 1000
#agent.channels.c1.transactionCapacity = 100

# Bind the source and sink to the channel
agent.sources.r1.channels = c1 c2
agent.sinks.k1.channel = c1

## hdfs
#Describe the sink
agent.sinks.k2.type = hdfs
agent.sinks.k2.hdfs.path = hdfs://qiyu01.com:8020/weblog/%y/%m
agent.sinks.k2.hdfs.fileType=DataStream
agent.sinks.k2.hdfs.writeFormat=Text
agent.sinks.k2.hdfs.rollInterval = 0
agent.sinks.k2.hdfs.rollSize = 134217728
agent.sinks.k2.hdfs.rollCount = 0
agent.sinks.k2.hdfs.batchSize=10
agent.sinks.k2.hdfs.useLocalTimeStamp = true

## Use a channel which buffers events in memory 
agent.channels.c2.type = memory
#agent.channels.c1.capacity = 1000
#agent.channels.c2.transactionCapacity = 100

## Bind the source and sink to the channel
#agent.sources.r1.channels = c2
agent.sinks.k2.channel = c2

回到flume的安装目录并执行启动flume：

[root@qiyu01 conf]# cd ..
[root@qiyu01 apache-flume-1.6.0-bin]# bin/flume-ng agent --conf-file  conf/flumeByHDFS_Kafka.conf -c conf/ --name agent -Dflume.root.logger=DEBUG,console

创建kafka的topic：

[root@qiyu01 apache-flume-1.6.0-bin]# cd /usr/hdp/3.0.1.0-187/kafka/
[root@qiyu01 kafka]# bin/kafka-topics.sh --create --zookeeper qiyu01.com:2181,qiyu02.com:2181,qiyu03.com:2181 --replication-factor 1 --partitions 1 --topic flumeByKafka_HDFS

写入数据：