编写配置文件
vim dir_kafka.conf
a1.sources = r1
a1.sinks = k1
a1.channels = c1
#对于source的配置描述 监听avro
a1.sources.r1.type = spooldir
a1.sources.r1.spoolDir =/opt/access.log
a1.sources.r1.fileSuffix=.FINISHED
a1.sources.r1.deserializer.maxLineLength=5120
#对于source的配置描述 监听文件中的新增数据 exec
a1.sources.r1.type = exec
a1.sources.r1.command = tail -F /opt/log/access.log
a1.sources.ri.shell = /bin/sh -c
#对于sink的配置描述 使用kafka做数据的消费
a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
a1.sinks.k1.topic = kk7
a1.sinks.k1.brokerList = 192.168.88.11:9092,192.168.88.12:9092,192.168.88.13:9092
a1.sinks.k1.requiredAcks = 1
a1.sinks.k1.batchSize = 20
#对于channel的配置描述 使用内存缓冲区域做数据的临时缓存
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
#通过channel c1将source r1和sink k1关联起来
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
flume 启动命令
./flume-ng agent -c /usr/local/flume/conf -f /usr/local/flume/conf/dir-hdfs.conf -n a1 -Dflume.root.logger=INFO,console