解压安装
解压
#tar -zxf apache-flume-1.6.0-bin.tar.gz -C /opt/apps
重命名
# mv apache-flume-1.6.0-bin flume
配置环境变量
# vi ~/.bashrc
添加
export FLUME_HOME=/opt/apps/flume
export PATH=$FLUME_HOME/bin:$PATH
修改配置文件flume-env.sh
export JAVA_HOME=/opt/apps/jdk
agent实例
在conf目录下新建flume-conf-hdfs.properties文件如下
#####################################################################
## 监听目录中的新增文件
## this agent is consists of source which is r1 , sinks which is k1,
## channel which is c1
##
## 这里面的a1 是flume一个实例agent的名字
#####################################################################
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# 监听数据源的方式,这里采用监听目录中的新增文件
a1.sources.r1.type = spooldir
a1.sources.r1.spoolDir = /home/flume/test
a1.sources.r1.fileSuffix = .ok
# a1.sources.r1.deletePolicy = immediate
a1.sources.r1.deletePolicy = never
a1.sources.r1.fileHeader = true
# 采集的数据的下沉(落地)方式 通过日志
#a1.sinks.k1.type = logger
a1.sinks.k1.type=hdfs
a1.sinks.k1.hdfs.useLocalTimeStamp=true
a1.sinks.k1.hdfs.path=hdfs://master:9000/flume-dir/
a1.sinks.k1.hdfs.filePrefix=log
a1.sinks.k1.hdfs.fileType=DataStream
a1.sinks.k1.hdfs.writeFormat=TEXT
a1.sinks.k1.hdfs.rollInterval=10
a1.sinks.k1.hdfs.rollCount=0
a1.sinks.k1.hdfs.rollSize
# 描述channel的部分,使用内存做数据的临时存储
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# 使用channel将source和sink连接起来
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
执行启动flume agent a1
# flume-ng agent -c conf -n a1 -f /opt/apps/flume/conf/flume-conf-hdfs.properties -Dflume.root.logger=INFO,console
操作相应的文件,生成采集的日志
在文件夹/home/flume/test执行如下操作ming命令:
# echo 'this is lys flume test'>flume.txt
19/04/28 06:16:36 INFO hdfs.HDFSEventSink: Writer callback called.
代表日志导入hdfs成功,并且在hdfs下可以看到文件