一、部署
1.下载安装包
http://archive.apache.org/dist/flume/
2.创建目录
mkdir -r /home/sxvbd/bigdata
3.上传flume包到目录下
apache-flume-1.9.0-bin.tar.gz
4.解压并修改名称
tar -zxvf apache-flume-1.9.0-bin.tar.gz
5.在/etc/profile中配置flume路径
export FLUME_HOME="/home/sxvbd/bigdata/flume-1.9.0"
export PATH=$PATH:$FLUME_HOME/bin
export CLASSPATH=.:$FLUME_HOME/lib
6.使配置生效
source /etc/profile
7.进入flume配置文件目录
cd /home/sxvbd/bigdata/flume-1.9.0/conf
8.修改配置文件名称
cp flume-conf.properties.template flume-conf.properties
cp flume-env.sh.template flume-env.sh
9.修改配置flume-env.sh
export JAVA_HOME=/usr/local/jdk1.8.0_333
10.创建需要采集的测试文件目录
mkdir -r /home/sxvbd/bigdata/flumeTestDir
11.修改配置文件flume-conf.properties
agent.sources = source1
agent.channels = channel1
agent.sinks = sink1
#For each one of the sources, the type is defined
#监控一个目录的,如果目录中出现了新的文件,就把文件内容采集过来
agent.sources.source1.type = spooldir
agent.sources.source1.bind = 0.0.0.0
agent.sources.source1.port = 44444
agent.sources.source1.spoolDir = /home/sxvbd/bigdata/flumeTestDir/
agent.channels.channel1.type = memory
agent.channels.channel1.capacity = 1000000
agent.channels.channel1.transactionCapacity = 10000
agent.channels.channel1.keep-alive = 60
#Each channel's type is defined.
#配置kafka的集群和topic
agent.sinks.sink1.type = org.apache.flume.sink.kafka.KafkaSink
agent.sinks.sink1.kafka.bootstrap.servers = node24:9092,node25:9092,node26:9092
agent.sinks.sink1.kafka.topic = data-ncm-hljk-topic
agent.sinks.sink1.parseAsFlumeEvent = false
agent.sources.source1.channels = channel1
agent.sinks.sink1.channel = channel1
12.启动flume
nohup ../bin/flume-ng agent --conf conf -f /home/sxvbd/bigdata/flume-1.9.0/conf/flume-conf.properties -n agent -Dflume.root.logger=INFO,console > flume.log 2>&1 &