kafka_14_集成 flume

最新推荐文章于 2024-10-01 16:01:52 发布

SOUL-GIN

最新推荐文章于 2024-10-01 16:01:52 发布

阅读量151

点赞数

分类专栏： kafka 文章标签： kafka

本文链接：https://blog.csdn.net/zxczb/article/details/108305569

版权

kafka 专栏收录该内容

14 篇文章 0 订阅

订阅专栏

前置条件

kafka 集群

软件信息

网盘下载地址
链接：https://pan.baidu.com/s/1b5DjLQBmbz_8Nh7DyLj2BQ
提取码：gin1

install

当前安装演示版本
apache-flume-1.9.0-bin.tar.gz
install

# 解压
tar -zxf apache-flume-1.9.0-bin.tar.gz -C /home/kafka/
# 查看
cd /home/kafka/apache-flume-1.9.0-bin/
ll
# 修改启动配置(可选)
vim flume-env.sh
# jdk路径
export JAVA_HOME=/opt/software/jdk/jdk1.8.0_192
# jvm参数, 堆外内存设置
export JAVA_OPTS="-Xms256m -Xmx1024m -XX:MaxDirectMemorySize=1024m -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+HeapDumpOnOutOfMemoryError"

# 创建kafka消费flume收集信息的配置文件
cd conf
vim kafka.properties

# 添加如下配置信息
# Name the components on this agent
# define component name(input channel/pip output)
a1.sources = r1
a1.sinks = k1
a1.channels = c1

# Describe/configure the source
# input
a1.sources.r1.type = avro
# alter begin 输入来源的hostname
a1.sources.r1.bind = kafka01
# alter end
a1.sources.r1.port = 44444

# Use a channel which buffers events in memory
# middle pip
a1.channels.c1.type = memory
a1.channels.c1.capacity = 10000
a1.channels.c1.transactionCapacity = 10000
a1.channels.c1.byteCapacityBufferPercentage = 20
a1.channels.c1.byteCapacity = 800000

# Describe the sink
# output
a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
# alter begin 输出到哪个kafka的集群和tpoic
a1.sinks.k1.kafka.topic = topic01
a1.sinks.k1.kafka.bootstrap.servers = kafka01:9092,kafka02:9092,kafka03:9092
# alter end
a1.sinks.k1.kafka.flumeBatchSize = 20
a1.sinks.k1.kafka.producer.acks = -1
a1.sinks.k1.kafka.producer.linger.ms = 1
a1.sinks.k1.kafka.producer.compression.type = snappy

# Use a channel which buffers events in memory
# binding policy
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1

测试

使用 eagle 来演示, 清理topic01(有则删除, 重新创建, 可以直接用kafka命令处理)
http://192.168.25.106:8048/ke
Account: admin
Password: 123456
删除topic密码
keadmin
topic下面的 list 标签页
http://192.168.25.106:8048/ke/topic/list
点击remove(对应topic01), 输入 keadmin
topic下面的 create 标签页
http://192.168.25.106:8048/ke/topic/create
创建 tpoic01 , 分区=3, 副本=1
查看topic01是否有数据
http://192.168.25.106:8048/ke/topic/message
select * from “topic01” where “partition” in (0,1,2) limit 10
启动kafka消费端

cd /home/kafka/kafka_2.11-2.2.0/bin
# 监听消费
./kafka-console-consumer.sh --bootstrap-server kafka01:9092,kafka02:9092,kafka03:9092 --topic topic01

启动 flume

/home/kafka/apache-flume-1.9.0-bin/bin
# 启动, -n 指定agent名称, 和配置文件(a1.sources 前缀)保持一致
# -D 设置日志级别
bin/flume-ng agent -c conf/ -n a1 -f conf/kafka.properties -Dflume.root.logger=INFO,console
# 正常启动后不要关闭, 新开一个shell窗口操作

启动 flume avro-client 测试

# 切换目录
/home/kafka/apache-flume-1.9.0-bin/bin

# 生成测试文件
cat > testKafkaFlumeImput.log << 'EOF'
1a
2b
3c
4d
5e
66gin
77nier
88luffy
99naruto
EOF

# 启动, avro-client, 指定输入的 hostname port, 和flume 配置文件(kafka.properties中的a1.sources.r1)保持一致, --filename 指定输入文件
./flume-ng avro-client -c ../conf/ --host kafka01 --port 44444 --filename ./testKafkaFlumeImput.log
# 可以看到 kafka 刚启动的消费端消费了 testKafkaFlumeImput.log 中的数据