Apache Flume
flume官方用户指南网址
启动flume
[ root@Centos ~ ] # cd / usr/ apache- flume- 1.9 .0 - bin/
[ root@Centos apache- flume- 1.9 .0 - bin] # . /bin/ flume- ng version
Flume 1.9 .0
Source code repository: https: / / git- wip- us. apache. org/ repos/ asf/ flume. git
Revision: d4fcab4f501d41597bc616921329a4339f73585e
Compiled by fszabo on Mon Dec 17 20 : 45 : 25 CET 2018
From source with checksum 35d b629a3bda49d23e9b3690c80737f9
File Roll Sink
编写example6.properties文本
[ root@Centos conf] # vim example6. properties
# 声明基本组件 Source Channel Sink example6. properties
a1. sources = s1
a1. sinks = sk1
a1. channels = c1
# 配置Source组件, 从Socket中接收⽂本数据
a1. sources. s1. type = netcat
a1. sources. s1. bind = Centos
a1. sources. s1. port = 44444
# 配置Sink组件, 将接收数据打印在⽇志控制台
a1. sinks. sk1. type = file_roll
a1. sinks. sk1. sink. directory = / root/ baizhi/ file_roll
a1. sinks. sk1. sink. rollInterval = 0
# 配置Channel通道,主要负责数据缓冲
a1. channels. c1. type = memory
a1. channels. c1. capacity = 1000
a1. channels. c1. transactionCapacity = 100
# 进⾏组件间的绑定
a1. sources. s1. channels = c1
a1. sinks. sk1. channel = c1
启动flume端文本
[ root@Centos apache- flume- 1.9 .0 - bin] # . /bin/ flume- ng agent -- conf conf/ -- name a1 -- conf- file conf/ example6. properties
netcat给数据
mkdir file_roll
ll
cd file_roll/
ll
telnet Centos 44444
shape of you
jiang siyu
123456
本地文件端结果
cat 1589865708996-1
shape of you
jiang siyu
123456
HDFS Sink
编写example7.properties文本
[ root@Centos conf] # vim example7. properties
#声明基本组件 Source Channel Sink example7. properties
a1. sources = s1
a1. sinks = sk1
a1. channels = c1
配置Source组件, 从Socket中接收⽂本数据
a1. sources. s1. type = netcat
a1. sources. s1. bind = Centos
a1. sources. s1. port = 44444
#配置Sink组件, 将接收数据打印在⽇志控制台
a1. sinks. sk1. type = hdfs
a1. sinks. sk1. hdfs. path = / flume- hdfs/ % y- % m- % d
a1. sinks. sk1. hdfs. rollInterval = 0
a1. sinks. sk1. hdfs. rollSize = 0
a1. sinks. sk1. hdfs. rollCount = 0
a1. sinks. sk1. hdfs. useLocalTimeStamp = true
a1. sinks. sk1. hdfs. fileType = DataStream
Kafka Sink
将数据写⼊Kafka的Topic中
Avro Sink: 将数据写出给Avro Source
#配置Channel通道,主要负责数据缓冲
a1. channels. c1. type = memory
a1. channels. c1. capacity = 1000
a1. channels. c1. transactionCapacity = 100
#进⾏组件间的绑定
a1. sources. s1. channels = c1
a1. sinks. sk1. channel = c1
启动flume端文本
[ root@Centos apache- flume- 1.9 .0 - bin] # . /bin/ flume- ng agent -- conf conf/ -- name a1 -- conf- file conf/ example7. properties
启动测试端
[ root@Centos ~ ] # telnet Centos 44444
Trying 192.168 .17 .150 . . .
Connected to Centos.
Escape character is '^]' .
jiang love money
OK
jiangsiyu love money
OK
jiang siyu 喜欢 money
OK
得到结果
登录hdfs网页端查看数据
Kafka Sink
创建文本example8.properties
[ root@Centos conf] # vim example8. properties
# 声明基本组件 Source Channel Sink example8. properties
a1. sources = s1
a1. sinks = sk1
a1. channels = c1
# 配置Source组件, 从Socket中接收⽂本数据
a1. sources. s1. type = netcat
a1. sources. s1. bind = Centos
a1. sources. s1. port = 44444
# 配置Sink组件, 将接收数据打印在⽇志控制台
a1. sinks. sk1. type = org. apache. flume. sink. kafka. KafkaSink
a1. sinks. sk1. kafka. bootstrap. servers = Centos: 9092
a1. sinks. sk1. kafka. topic = topic01
a1. sinks. sk1. kafka. flumeBatchSize = 20
a1. sinks. sk1. kafka. producer. acks = 1
a1. sinks. sk1. kafka. producer. linger. ms = 1
a1. sinks. sk1. kafka. producer. compression. type = snappy
# 配置Channel通道,主要负责数据缓冲
a1. channels. c1. type = memory
a1. channels. c1. capacity = 1000
a1. channels. c1. transactionCapacity = 100
# 进⾏组件间的绑定
a1. sources. s1. channels = c1
a1. sinks. sk1. channel = c1
启动flume文本
[ root@Centos apache- flume- 1.9 .0 - bin] # . /bin/ flume- ng agent -- conf conf/ -- name a1 -- conf- file conf/ example8. properties
写入flume数据端
[ root@Centos ~ ] # telnet Centos 44444
Trying 192.168 .17 .150 . . .
Connected to Centos.
Escape character is '^]' .
kafka sink
OK
jiangsiyu 喜欢 money
OK
miss you
OK
kafka消费者输出端结果
[ root@Centos kafka_2. 11 - 2.2 .0 ] # . /bin/ kafka- console- consumer. sh -- bootstrap- server Centos: 9092 -- topic topic01
kafka sink
jiangsiyu 喜欢 money
miss you