1、下载解压Flume
cd /home/hadoop/app
tar -zxvf apache-flume-1.10.0-bin.tar.gz
rm apache-flume-1.10.0-bin.tar.gz
ln -s apache-flume-1.10.0-bin kafka
2、修改Flume配置文件
监测目录下是否有新文件,如果有就解析出来,发给kafka
cd /home/hadoop/app/flume/conf
vi a4.properties
#定义agent名, source、channel、sink的名称
a4.sources = r1
a4.channels = c1
a4.sinks = k1
#具体定义source
a4.sources.r1.type = spooldir
a4.sources.r1.spoolDir = /home/hadoop/app/flume/flumelogs #监测目录下是否有新文件
#具体定义channel
a4.channels.c1.type = memory
a4.channels.c1.capacity = 1000
a4.channels.c1.transactionCapacity = 100
#具体定义sink
a4.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
a4.sinks.k1.topic = mydemo2 #提前在Kafka上创建名称为mydemo2的topic
a4.sinks.k1.brokerList = hadoop01:9092,hadoop02:9092,hadoop03:9092
a4.sinks.k1.requiredAcks = 1
a4.sinks.k1.batchSize = 20
#组装source、channel、sink
a4.sources.r1.channels = c1
a4.sinks.k1.channel = c1
3、启动kafka的消费者
cd /home/hadoop/app/kafka/bin
#创建topic
kafka-topics.sh --bootstrap-server hadoop01:9092 --create --topic mydemo2 --replication-factor 3 --partitions 3
#启动消费者
kafka-console-consumer.sh --bootstrap-server hadoop01:9092,hadoop02:9092,hadoop03:9092 --topic mydemo2
#启动生产者
kafka-console-producer.sh --broker-list hadoop01:9092 --topic mydemo2
#如果配置正确,生产者生产的消息,在消费者控制台可以打印出来
4、启动Flume进行测试
cd /home/hadoop/app/flume/bin
#启动flume
bin/flume-ng agent -n agent -c conf -f conf/a4.properties -Dflume.root.logger=INFO
#在别的目录做成dj.txt
fas
fdsa
fas123
#把dj.txt复制到以下目录中
/home/hadoop/app/flume/flumelogs
#在kafka的消费者控制台能够打印出文件的内容
fas
fdsa
fas123