1.vm安装(略)
2.hadoop 2.7.2(略)----详细配置见本人blog centos7(vm)下hadoop2.7.2完全分布式安装验证(x86)-hadoop3节点集群(2副本)
3. 组网结构 192.168.1.150 192.168.1.151,192.168.1.152
三个节点分别上传flume1.4.0 解压
注意修改环境变量:flume-en.sh
# Enviroment variables can be set here.
JAVA_HOME=/root/java/jdk1.8.0_77
# Give Flume more memory and pre-allocate, enable remote monitoring via JMX
#JAVA_OPTS="-Xms100m -Xmx200m -Dcom.sun.management.jmxremote"
# Note that the Flume conf directory is always included in the classpath.
FLUME_CLASSPATH=/root/flume/apache-flume-1.4.0-bin/lib
~
192.168.1.150 作为收集点
[root@namenode flume]# cd
[root@namenode ~]# mkdir flume^C
[root@namenode ~]# cd flume/
[root@namenode flume]# tar -zxvf apache-flume-1.4.0-bin.tar.gz ^C
[root@namenode flume]# cd apache-flume-1.4.0-bin/
[root@namenode apache-flume-1.4.0-bin]# ll
total 176
drwxr-xr-x. 2 501 games 21 Dec 21 21:21 bin
-rw-r--r--. 1 501 games 51637 Jun 25 2013 CHANGELOG
drwxr-xr-x. 2 501 games 4096 Dec 22 14:11 conf
-rw-r--r--. 1 501 games 6172 Mar 14 2013 DEVNOTES
drwxr-xr-x. 10 501 games 4096 Jun 25 2013 docs
drwxr-xr-x. 2 root root 16384 Dec 22 10:31 lib
-rw-r--r--. 1 501 games 73739 Jun 24 2013 LICENSE
drwxr-xr-x. 2 root root 22 Dec 21 21:41 logs
-rw-r--r--. 1 501 games 249 Mar 14 2013 NOTICE
-rw-r--r--. 1 501 games 1779 Mar 14 2013 README
-rw-r--r--. 1 501 games 1586 Jun 25 2013 RELEASE-NOTES
drwxr-xr-x. 2 root root 67 Dec 21 21:21 tools
[root@namenode apache-flume-1.4.0-bin]# cd conf/
[root@namenode conf]# vi consolidatio.conf
配置如下
#flumeonsolidationAgent
flumeConsolidationAgent.channels = c1
flumeConsolidationAgent.sources = s1
flumeConsolidationAgent.sinks = k1
#flumeConsolidationAgent Avro Source
#注(4)
flumeConsolidationAgent.sources.s1.type = avro
flumeConsolidationAgent.sources.s1.channels = c1
flumeConsolidationAgent.sources.s1.bind = 192.168.1.150
flumeConsolidationAgent.sources.s1.port = 44444
#flumeConsolidationAgent FileChannel
flumeConsolidationAgent.channels.c1.type = file
flumeConsolidationAgent.channels.c1.checkpointDir = /var/flume/spool/checkpoint
flumeConsolidationAgent.channels.c1.dataDirs = /var/flume/spool/data
flumeConsolidationAgent.channels.c1.capacity = 200000000
flumeConsolidationAgent.channels.c1.keep-alive = 30
flumeConsolidationAgent.channels.c1.write-timeout = 30
flumeConsolidationAgent.channels.c1.checkpoint-timeout=600
##flumeConsolidationAgent Memory Channel
#flumeConsolidationAgent.channels.c1.type = memory
#flumeConsolidationAgent.channels.c1.capacity = 10000
#flumeConsolidationAgent.channels.c1.transactionCapacity = 10000
#flumeConsolidationAgent.channels.c1.byteCapacityBufferPercentage = 20
#flumeConsolidationAgent.channels.c1.byteCapacity = 800000
#flumeConsolidationAgent Sinks
#注(5)
flumeConsolidationAgent.sinks.k1.channel= c1
flumeConsolidationAgent.sinks.k1.type = file_roll
flumeConsolidationAgent.sinks.k1.sink.directory = /var/tmp
flumeConsolidationAgent.sinks.k1.sink.rollInterval = 3600
flumeConsolidationAgent.sinks.k1.batchSize = 10000
#flumeConsolidationAgent.sinks.k1.type = hdfs
#flumeConsolidationAgent.sinks.k1.hdfs.path = hdfs://192.168.1.150:9000/mycluster/flume/%Y-%m-%d
wq!
151和152 配置一样 在flume/conf/下新建agent.conf文件
#agent
flumeAgent.channels = c1
flumeAgent.sources = s1
flumeAgent.sinks = k1
#flumeAgent Spooling Directory Source
#注(1)
flumeAgent.sources.s1.type = spooldir
flumeAgent.sources.s1.spoolDir =/usr/logs/
flumeAgent.sources.s1.fileHeader = true
flumeAgent.sources.s1.deletePolicy =immediate
flumeAgent.sources.s1.batchSize =1000
flumeAgent.sources.s1.channels =c1
flumeAgent.sources.s1.deserializer.maxLineLength =1048576
#flumeAgent FileChannel
#注(2)
flumeAgent.channels.c1.type = file
flumeAgent.channels.c1.checkpointDir = /var/flume/spool/checkpoint
flumeAgent.channels.c1.dataDirs = /var/flume/spool/data
flumeAgent.channels.c1.capacity = 200000000
flumeAgent.channels.c1.keep-alive = 30
flumeAgent.channels.c1.write-timeout = 30
flumeAgent.channels.c1.checkpoint-timeout=600
# flumeAgent Sinks
#注(3)
flumeAgent.sinks.k1.channel = c1
flumeAgent.sinks.k1.type = avro
# connect to CollectorMainAgent
flumeAgent.sinks.k1.hostname = 192.168.1.150
flumeAgent.sinks.k1.port = 44444
wq!
150 启动命令(先)
bin/flume-ng agent --conf conf --conf-file conf/consolidatio.conf --name flumeConsolidationAgent -Dflume.root.logger=DEBUG,console
151,152 启动命令
bin/flume-ng agent --conf conf --conf-file conf/agent.conf --name flumeAgent -Dflume.root.logger=DEBUG,console
此处有三种模式运行:INFO、DEBUG、ERROR,以次容错度升高。
注(1):sources类型为spooldir,监控某一目录下的文件,一旦有文件进入,则收割。被收割的文件不能再打开编辑,此处设置收割完毕后直接删除文件,这儿我出现的一个问题是,直接手动往该目录下拷贝文件一旦文件大小高于20M左右就宕机,提前拷入则正常,后来查到原因是拷贝的的速度远远小于收割的速度,有种文件被操作的感觉,所以出错。
注(2):channel类型为file。
MemoryChannel: 所有的events被保存在内存中。优点是高吞吐。缺点是容量有限并且Agent死掉时会丢失内存中的数据。
FileChannel: 所有的events被保存在文件中。优点是容量较大且死掉时数据可恢复。缺点是速度较慢。
注(3):sink类型为avro,hostname都要指向的consolidation的IP,端口号可以任意设置,只要不冲突就行,这样也会出现问题,当107的flumeConsolidationAgent重新启动后会出现“地址已被占用的”错误,要么修改端口号,要么杀死该进程。
注(4):consolidation要绑定本机的IP,端口号也要与105和106一致。
注(5):此处我们将收集的文件存入本地,并没有写入HDFS,因为还要装Hadoop。这里要千万注意所有sinks的channel,比如x.sinks.k1.channel = c1中的channel一定不加s。
根据配置 在agent的/usr/logs/ 目录下添加文件内容,在 150的 /var/tmp/下查看收集内容
[root@datanode1 logs]# vi log2.txt
fdad
afda
df
afa
dfa
sdf
~
[root@namenode tmp]# cat 1482387239808-1
fdad
afda
df
afa
dfa
sdf