只安装jdk不依赖Hadoop
多agent采集数据
收集数据source
暂时存放数据channel
将数据写入某个介子sink
修改ip后执行的语句
service network restart
1.下载地址
http://archive.apache.org/dist/flume/
2.解压
tar -zxvf apache-flume-1.5.0-bin.tar.gz -C /master/
3.配置
mv flume-env.sh.template flume-env.sh
vi flume-env.sh
JAVA_HOME=/usr/java/jdk../bin
cd /master/apache-flume-1.5.0-bin/conf
拷贝到当前目录 cp /root/a4.conf .
a4.conf
#定义agent名 source channel sink 的名称
a4.sources=r1
a4.channels=c1
a4.sinks=k1
#具体定义source
a4.sources.r1.type=spooldir
a4.sources.r1.spooldir=/root/logs
#具体定义channel
a4.channels.c1.type=memory
a4.channels.c1.capacity=10000
a4.channels.c1.transactionCapacity=100
#定义拦截器,为消息添加时间戳
a4.sources.r1.interceptors=i1
a4.sources.r1.interceptors.i1.type=org.apache.flume.interceptor.TimestampInterceptor$Builder
#具体定义sink
a4.sinks.k1.type=hdfs
a4.sinks.k1.hdfs.path=hdfs://ns1/flume/%y%m%d
a4.sinks.k1.hdfs.filePrefix=events-
a4.sinks.k1.hdfs.fileType=DataStream
#不按照条数生成文件
a4.sinks.k1.hdfs.rollCount=0
#HDFS上的文件达到128M时生成一个文件
a4.sinks.k1.hdfs.rollSize=134217728
#HDFS上的文件达到60秒生成一个文件
a4.sinks.k1.hdfs.rollInterval=60
#组装source channel sink
a4.sources.r1.channels=c1
a4.sinks.k1.channel=c1
vi a4.conf
4.启动flume
拷包
cd /master/hadoop-2.2.0/share/hadoop/common/
scp hadoop-common-2.2.0.jar 192.168.196.3:/master/apache-flume-1.5.0-bin/lib
cd /master/hadoop-2.2.0/share/hadoop/common/lib/
scp commons-configuration-1.6.jar 192.168.196.3:/master/apache-flume-1.5.0-bin/lib
cd /master/hadoop-2.2.0/share/hadoop/common/lib/
scp hadoop-auth-2.2.0.jar 192.168.196.3:/master/apache-flume-1.5.0-bin/lib
cd /master/hadoop-2.2.0/share/hadoop/common/lib/
scp /master/hadoop-2.2.0/etc/hadoop/{core-site.xml, hdfs-site.xml} 192.168.196.3:/master/apache-flume-1.5.0-bin/conf
cd /master/hadoop-2.2.0/share/hadoop/hdfs/
scp hadoop-hdfs-2.2.0.jar 192.168.196.3:/master/apache-flume-1.5.0-bin/lib
vi /etc/hosts
192.168.196.10 master01
192.168.196.11 master02
启动flume
mkdir /root/logs
cd /master/apache-flume-1.5.0-bin/bin
mkdir /root/logs
./flume-ng agent -n a4 -c conf -f conf/a4.conf -Dflume.root.logger=INFO,console
在logs中写数据
cp .......(数据文件名) logs
多agent采集数据
收集数据source
暂时存放数据channel
将数据写入某个介子sink
修改ip后执行的语句
service network restart
1.下载地址
http://archive.apache.org/dist/flume/
2.解压
tar -zxvf apache-flume-1.5.0-bin.tar.gz -C /master/
3.配置
mv flume-env.sh.template flume-env.sh
vi flume-env.sh
JAVA_HOME=/usr/java/jdk../bin
cd /master/apache-flume-1.5.0-bin/conf
拷贝到当前目录 cp /root/a4.conf .
a4.conf
#定义agent名 source channel sink 的名称
a4.sources=r1
a4.channels=c1
a4.sinks=k1
#具体定义source
a4.sources.r1.type=spooldir
a4.sources.r1.spooldir=/root/logs
#具体定义channel
a4.channels.c1.type=memory
a4.channels.c1.capacity=10000
a4.channels.c1.transactionCapacity=100
#定义拦截器,为消息添加时间戳
a4.sources.r1.interceptors=i1
a4.sources.r1.interceptors.i1.type=org.apache.flume.interceptor.TimestampInterceptor$Builder
#具体定义sink
a4.sinks.k1.type=hdfs
a4.sinks.k1.hdfs.path=hdfs://ns1/flume/%y%m%d
a4.sinks.k1.hdfs.filePrefix=events-
a4.sinks.k1.hdfs.fileType=DataStream
#不按照条数生成文件
a4.sinks.k1.hdfs.rollCount=0
#HDFS上的文件达到128M时生成一个文件
a4.sinks.k1.hdfs.rollSize=134217728
#HDFS上的文件达到60秒生成一个文件
a4.sinks.k1.hdfs.rollInterval=60
#组装source channel sink
a4.sources.r1.channels=c1
a4.sinks.k1.channel=c1
vi a4.conf
4.启动flume
拷包
cd /master/hadoop-2.2.0/share/hadoop/common/
scp hadoop-common-2.2.0.jar 192.168.196.3:/master/apache-flume-1.5.0-bin/lib
cd /master/hadoop-2.2.0/share/hadoop/common/lib/
scp commons-configuration-1.6.jar 192.168.196.3:/master/apache-flume-1.5.0-bin/lib
cd /master/hadoop-2.2.0/share/hadoop/common/lib/
scp hadoop-auth-2.2.0.jar 192.168.196.3:/master/apache-flume-1.5.0-bin/lib
cd /master/hadoop-2.2.0/share/hadoop/common/lib/
scp /master/hadoop-2.2.0/etc/hadoop/{core-site.xml, hdfs-site.xml} 192.168.196.3:/master/apache-flume-1.5.0-bin/conf
cd /master/hadoop-2.2.0/share/hadoop/hdfs/
scp hadoop-hdfs-2.2.0.jar 192.168.196.3:/master/apache-flume-1.5.0-bin/lib
vi /etc/hosts
192.168.196.10 master01
192.168.196.11 master02
启动flume
mkdir /root/logs
cd /master/apache-flume-1.5.0-bin/bin
mkdir /root/logs
./flume-ng agent -n a4 -c conf -f conf/a4.conf -Dflume.root.logger=INFO,console
在logs中写数据
cp .......(数据文件名) logs