安装
下载
http://flume.apache.org/download.html
http://archive.apache.org/dist/flume/1.8.0/
解压
tar -xzvf apache-flume-1.8.0-bin.tar.gz
建立一个软连接
ln -s apache-flume-1.8.0-bin flume-1.8.0
设置环境变量
[root@single ~]# echo $PATH
/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/root/bin:/home/hadoop/hadoop-2.7.3/bin:/home/hadoop/hadoop-2.7.3/sbin:/home/hadoop/hbase-1.2.6/bin:/home/hadoop/zookeeper-3.4.6/bin:/root/bin:/home/hadoop/hadoop-2.7.3/bin:/home/hadoop/hadoop-2.7.3/sbin:/home/hadoop/hbase-1.2.6/bin:/home/hadoop/zookeeper-3.4.6/bin:/home/hadoop/flume-1.8.0/bin
[root@single ~]#
[root@single ~]#
[root@single ~]# flume-ng version
Flume 1.8.0
Source code repository: https://git-wip-us.apache.org/repos/asf/flume.git
Revision: 99f591994468633fc6f8701c5fc53e0214b6da4f
Compiled by denes on Fri Sep 15 14:58:00 CEST 2017
From source with checksum fbb44c8c8fb63a49be0a59e27316833d
配置与运行
(1)在配置文件中描述Source,Channel与Sink的具体实现。
(2)运行一个Agent实例,在运行实例的过程中会读取配置文件的内容,这样Flume就会采集到数据。
- 在$FLUME_HOME/conf下创建my.conf
touch my.conf
- 从整体上描述Agent中的Sources, Sinks,Channels
[root@single conf]# cat my.conf
a1.sources=s1
a1.sinks=k1
a1.channels=c1
- 指定source, sink, channel的属性特征
[root@single conf]# cat my.conf
#指定Agent的组件名称
a1.sources=s1
a1.sinks=k1
a1.channels=c1
#指定source的类型为spoolDir,要监听的路径为/home/hadoop/tmp
a1.sources.s1.type=spoolDir
a1.sources.s1.spoolDir=/home/hadoop/tmp
#指定sink的类型为logger
a1.sinks.k1.type = logger
#指定channel为内存通道,通道的最大容量为1000,单事务一次读写channel的事件最多为100
a