1下载解压到指定目录
22)修改 flume-env.sh 配置文件,主要是JAVA_HOME变量设置
1.root@m1:/home/hadoop/flume-1.5.0-bin# cp conf/flume-env.sh.template conf/flume-env.sh
2.root@m1:/home/hadoop/flume-1.5.0-bin# vi conf/flume-env.sh
3.# Licensed to the Apache Software Foundation (ASF) under one
4.# or more contributor license agreements. See the NOTICE file
5.# distributed with this work for additional information
6.# regarding copyright ownership. The ASF licenses this file
7.# to you under the Apache License, Version 2.0 (the
8.# “License”); you may not use this file except in compliance
9.# with the License. You may obtain a copy of the License at
10.#
11.# http://www.apache.org/licenses/LICENSE-2.0
12.#
13.# Unless required by applicable law or agreed to in writing, software
14.# distributed under the License is distributed on an “AS IS” BASIS,
15.# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16.# See the License for the specific language governing permissions and
17.# limitations under the License.
18.
19.# If this file is placed at FLUME_CONF_DIR/flume-env.sh, it will be sourced
20.# during Flume startup.
21.
22.# Enviroment variables can be set here.
23.
24.JAVA_HOME=/usr/lib/jvm/java-7-oracle
25.
26.# Give Flume more memory and pre-allocate, enable remote monitoring via JMX
27.#JAVA_OPTS=”-Xms100m -Xmx200m -Dcom.sun.management.jmxremote”
28.
29.# Note that the Flume conf directory is always included in the classpath.
30.#FLUME_CLASSPATH=””
复制代码
3)验证是否安装成功
1.root@m1:/home/hadoop# /home/hadoop/flume-1.5.0-bin/bin/flume-ng version
2.Flume 1.5.0
3.Source code repository: https://git-wip-us.apache.org/repos/asf/flume.git
4.Revision: 8633220df808c4cd0c13d1cf0320454a94f1ea97
5.Compiled by hshreedharan on Wed May 7 14:49:18 PDT 2014
6.From source with checksum a01fe726e4380ba0c9f7a7d222db961f
7.root@m1:/home/hadoop#
复制代码
出现上面的信息,表示安装成功了
五、flume的案例
1)案例1:Avro
Avro可以发送一个给定的文件给Flume,Avro 源使用AVRO RPC机制。
a)创建agent配置文件
- root@m1:/home/hadoop#vi /home/hadoop/flume-1.5.0-bin/conf/avro.conf
-
- a1.sources = r1
- a1.sinks = k1
- a1.channels = c1
-
- # Describe/configure the source
- a1.sources.r1.type = avro
- a1.sources.r1.channels = c1
- a1.sources.r1.bind = 0.0.0.0
- a1.sources.r1.port = 4141
-
- # Describe the sink
- a1.sinks.k1.type = logger
-
- # Use a channel which buffers events in memory
- a1.channels.c1.type = memory
- a1.channels.c1.capacity = 1000
- a1.channels.c1.transactionCapacity = 100
-
- # Bind the source and sink to the channel
- a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
复制代码b)启动flume agent a1
root@m1:/home/hadoop# /home/hadoop/flume-1.5.0-bin/bin/flume-ng agent -c . -f /home/hadoop/flume-1.5.0-bin/conf/avro.conf -n a1 -Dflume.root.logger=INFO,console
复制代码
c)创建指定文件
root@m1:/home/hadoop# echo “hello world” > /home/hadoop/flume-1.5.0-bin/log.00
复制代码d)使用avro-client发送文件
root@m1:/home/hadoop# /home/hadoop/flume-1.5.0-bin/bin/flume-ng avro-client -c . -H m1 -p 4141 -F /home/hadoop/flume-1.5.0-bin/log.00
复制代码d)使用avro-client发送文件
root@m1:/home/hadoop# /home/hadoop/flume-1.5.0-bin/bin/flume-ng avro-client -c . -H m1 -p 4141 -F /
复制代码f)在m1的控制台,可以看到以下信息,注意最后一行:
root@m1:/home/hadoop/flume-1.5.0-bin/conf# /home/hadoop/flume-1.5.0-bin/bin/flume-ng agent -c . -f /home/hadoop/flume-1.5.0-bin/conf/avro.conf -n a1 -Dflume.root.logger=INFO,console
- Info: Sourcing environment configuration script /home/hadoop/flume-1.5.0-bin/conf/flume-env.sh
- Info: Including Hadoop libraries found via (/home/hadoop/hadoop-2.2.0/bin/hadoop) for HDFS access
- Info: Excluding /home/hadoop/hadoop-2.2.0/share/hadoop/common/lib/slf4j-api-1.7.5.jar from classpath
- Info: Excluding /home/hadoop/hadoop-2.2.0/share/hadoop/common/lib/slf4j-log4j12-1.7.5.jar from classpath
- …
- 2014-08-10 10:43:25,112 (New I/O worker #1) [INFO - org.apache.avro.ipc.NettyServer$NettyServerAvroHandler.handleUpstream(NettyServer.java:171)] [id: 0x92464c4f, /192.168.1.50:59850 :> /192.168.1.50:4141] UNBOUND
- 2014-08-10 10:43:25,112 (New I/O worker #1) [INFO - org.apache.avro.ipc.NettyServer$NettyServerAvroHandler.handleUpstream(NettyServer.java:171)] [id: 0x92464c4f, /192.168.1.50:59850 :> /192.168.1.50:4141] CLOSED
- 2014-08-10 10:43:25,112 (New I/O worker #1) [INFO - org.apache.avro.ipc.NettyServer$NettyServerAvroHandler.channelClosed(NettyServer.java:209)] Connection to /192.168.1.50:59850 disconnected.
- 2014-08-10 10:43:26,718 (SinkRunner-PollingRunner-DefaultSinkProcessor) [INFO - org.apache.flume.sink.LoggerSink.process(LoggerSink.java:70)] Event: { headers:{} body: 68 65 6C 6C 6F 20 77 6F 72 6C 64 hello world }
复制代码
2)案例2:Spool
Spool监测配置的目录下新增的文件,并将文件中的数据读取出来。需要注意两点:
1) 拷贝到spool目录下的文件不可以再打开编辑。
2) spool目录下不可包含相应的子目录a)创建agent配置文件
root@m1:/home/hadoop# vi /home/hadoop/flume-1.5.0-bin/conf/spool.conf
-
- a1.sources = r1
- a1.sinks = k1
- a1.channels = c1
-
- # Describe/configure the source
- a1.sources.r1.type = spooldir
- a1.sources.r1.channels = c1
- a1.sources.r1.spoolDir = /home/hadoop/flume-1.5.0-bin/logs
- a1.sources.r1.fileHeader = true
-
- # Describe the sink
- a1.sinks.k1.type = logger
-
- # Use a channel which buffers events in memory
- a1.channels.c1.type = memory
- a1.channels.c1.capacity = 1000
- a1.channels.c1.transactionCapacity = 100
-
- # Bind the source and sink to the channel
- a1.sources.r1.channels = c1
- a1.sinks.k1.channel = c1
复制代码
b)启动flume agent a1
root@m1:/home/hadoop# /home/hadoop/flume-1.5.0-bin/bin/flume-ng agent -c . -f /home/hadoop/flume-1.5.0-bin/conf/spool.conf -n a1 -Dflume.root.logger=INFO,console
复制代码
c)追加文件到/home/hadoop/flume-1.5.0-bin/logs目录
root@m1:/home/hadoop# echo “spool test1” > /home/hadoop/flume-1.5.0-bin/logs/spool_text.log
复制代码
d)在m1的控制台,可以看到以下相关信息:
14/08/10 11:37:13 INFO source.SpoolDirectorySource: Spooling Directory Source runner has shutdown.
- 14/08/10 11:37:13 INFO source.SpoolDirectorySource: Spooling Directory Source runner has shutdown.
- 14/08/10 11:37:14 INFO avro.ReliableSpoolingFileEventReader: Preparing to move file /home/hadoop/flume-1.5.0-bin/logs/spool_text.log to /home/hadoop/flume-1.5.0-bin/logs/spool_text.log.COMPLETED
- 14/08/10 11:37:14 INFO source.SpoolDirectorySource: Spooling Directory Source runner has shutdown.
- 14/08/10 11:37:14 INFO source.SpoolDirectorySource: Spooling Directory Source runner has shutdown.
- 14/08/10 11:37:14 INFO sink.LoggerSink: Event: { headers:{file=/home/hadoop/flume-1.5.0-bin/logs/spool_text.log} body: 73 70 6F 6F 6C 20 74 65 73 74 31 spool test1 }
- 14/08/10 11:37:15 INFO source.SpoolDirectorySource: Spooling Directory Source runner has shutdown.
- 14/08/10 11:37:15 INFO source.SpoolDirectorySource: Spooling Directory Source runner has shutdown.
- 14/08/10 11:37:16 INFO source.SpoolDirectorySource: Spooling Directory Source runner has shutdown.
- 14/08/10 11:37:16 INFO source.SpoolDirectorySource: Spooling Directory Source runner has shutdown.
14/08/10 11:37:17 INFO source.SpoolDirectorySource: Spooling Directory Source runner has shutdown.
复制代码3)案例3:Exec
EXEC执行一个给定的命令获得输出的源,如果要使用tail命令,必选使得file足够大才能看到输出内容a)创建agent配置文件
root@m1:/home/hadoop# vi /home/hadoop/flume-1.5.0-bin/conf/exec_tail.conf
-
- a1.sources = r1
- a1.sinks = k1
- a1.channels = c1
-
- # Describe/configure the source
- a1.sources.r1.type = exec
- a1.sources.r1.channels = c1
- a1.sources.r1.command = tail -F /home/hadoop/flume-1.5.0-bin/log_exec_tail
-
- # Describe the sink
- a1.sinks.k1.type = logger
-
- # Use a channel which buffers events in memory
- a1.channels.c1.type = memory
- a1.channels.c1.capacity = 1000
- a1.channels.c1.transactionCapacity = 100
-
- # Bind the source and sink to the channel
- a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
复制代码b)启动flume agent a1
root@m1:/home/hadoop# /home/hadoop/flume-1.5.0-bin/bin/flume-ng agent -c . -f /home/hadoop/flume-1.5.0-bin/conf/exec_tail.conf -n a1 -Dflume.root.logger=INFO,console
复制代码
c)生成足够多的内容在文件里
root@m1:/home/hadoop# for i in {1..100};do echo “exec tail$i” >> /home/hadoop/flume-1.5.0-bin/log_
复制代码e)在m1的控制台,可以看到以下信息:
2014-08-10 10:59:25,513 (SinkRunner-PollingRunner-DefaultSinkProcessor) [INFO - org.apache.flume.sink.LoggerSink.process(LoggerSink.java:70)] Event: { headers:{} body: 65 78 65 63 20 74 61 69 6C 20 74 65 73 74 exec tail test }
- 2014-08-10 10:59:34,535 (SinkRunner-PollingRunner-DefaultSinkProcessor) [INFO - org.apache.flume.sink.LoggerSink.process(LoggerSink.java:70)] Event: { headers:{} body: 65 78 65 63 20 74 61 69 6C 20 74 65 73 74 exec tail test }
- 2014-08-10 11:01:40,557 (SinkRunner-PollingRunner-DefaultSinkProcessor) [INFO - org.apache.flume.sink.LoggerSink.process(LoggerSink.java:70)] Event: { headers:{} body: 65 78 65 63 20 74 61 69 6C 31 exec tail1 }
- 2014-08-10 11:01:41,180 (SinkRunner-PollingRunner-DefaultSinkProcessor) [INFO - org.apache.flume.sink.LoggerSink.process(LoggerSink.java:70)] Event: { headers:{} body: 65 78 65 63 20 74 61 69 6C 32 exec tail2 }
- 2014-08-10 11:01:41,180 (SinkRunner-PollingRunner-DefaultSinkProcessor) [INFO - org.apache.flume.sink.LoggerSink.process(LoggerSink.java:70)] Event: { headers:{} body: 65 78 65