6、
spool: Source来源于目录,有文件进入目录就摄取,File Channel将它暂存到磁盘,最终目的地是HDFS
即只要某个目录不断有文件,HDFS上也会同步到所有数据。
mem:通过内存来传输数据
logger:是传送数据到日志
[root@sh01~]# vi spool-logger.conf
a1.sources = r1
a1.channels = c1
a1.sinks = s1
a1.sources.r1.type=spooldir
a1.sources.r1.spoolDir = /home/flume/spool
a1.sources.r1.fileSuffix = .COMPLETED
a1.sources.r1.deletePolicy=never
a1.sources.r1.fileHeader=false
a1.sources.r1.fileHeaderKey=file
a1.sources.r1.basenameHeader=false
a1.sources.r1.basenameHeaderKey=basename
a1.sources.r1.batchSize=100
a1.sources.r1.inputCharset=UTF-8
a1.sources.r1.bufferMaxLines=1000
a1.channels.c1.type=memory
a1.channels.c1.capacity=1000
a1.channels.c1.transactionCapacity=100
a1.channels.c1.keep-alive=3
a1.channels.c1.byteCapacityBufferPercentage=20
a1.channels.c1.byteCapacity=800000
a1.sinks.s1.type=logger
a1.sinks.s1.maxBytesToLog=16
a1.sources.r1.channels=c1
a1.sinks.s1.channel=c1
启动 agent
[root@sh01 ~]# flume-ng agent -c /usr/local/flume1.8/conf -f /root/flumeconf/spool-logger.conf -n a1 -Dflume.root.logger=INFO,console
测试数据
[root@sh01 ~]# for i in `seq 1 10` ; do echo $i >> /home/flume/spool/$i;done
7、
http+ mem+logger
http: 表示数据来源是http网络协议,一般接收的请求为get或post请求. 所有的http请求会通过插件格式的Handle转化为一个flume的Event数据.
mem:表示用内存传输通道
logger:表示输出格式为Logger格式
[root@sh01~]# vi http-logger.conf
a1.sources = r1
a1.channels = c1
a1.sinks = s1
a1.sources.r1.type=http
a1.sources.r1.bind = hadoop01
a1.sources.r1.port = 6666
a1.sources.r1.handler = org.apache.flume.source.http.JSONHandler
a1.sources.r1.handler.nickname = random props
a1.channels.c1.type=memory
a1.channels.c1.capacity=1000
a1.channels.c1.transactionCapacity=100
a1.channels.c1.keep-alive=3
a1.channels.c1.byteCapacityBufferPercentage=20
a1.channels.c1.byteCapacity=800000
a1.sinks.s1.type=logger
a1.sinks.s1.maxBytesToLog=16
a1.sources.r1.channels=c1
a1.sinks.s1.channel=c1
启动 agent
[root@sh01 ~]# flume-ng agent -c /usr/local/flume1.8/conf -f /root/flumeconf/http-logger.conf -n a1 -Dflume.root.logger=INFO,console
测试数据
[root@sh01 ~]# curl -X POST -d '[{"headers":{"name":"zhangsan","pwd":"123456"},"body":"this is my content"}]' http://qianfeng01:6666
8、
Syslog tcp+mem+logger
syslog tcp: syslog广泛应用于系统日志。syslog日志消息既可以记录在本地文件中,也可以通过网络发送到接收syslog的服务器。接收syslog的服务器可以对多个设备的syslog消息进行统一的存储,或者解析其中的内容做相应的处理。本数据源指的是syslog的通过tcp来端口来传送数据
mem:通过内存选择器来处理
logger:输出目的地为logger
[root@sh01~]# vi syslogtcp-logger.conf
a1.sources = r1
a1.channels = c1
a1.sinks = s1
a1.sources.r1.type=syslogtcp
a1.sources.r1.host = hadoop01
a1.sources.r1.port = 6666
a1.channels.c1.type=memory
a1.channels.c1.capacity=1000
a1.channels.c1.transactionCapacity=100
a1.channels.c1.keep-alive=3
a1.channels.c1.byteCapacityBufferPercentage=20
a1.channels.c1.byteCapacity=800000
a1.sinks.s1.type=logger
a1.sinks.s1.maxBytesToLog=16
a1.sources.r1.channels=c1
a1.sinks.s1.channel=c1
启动 agent
[root@sh01 ~]# flume-ng agent -c /usr/local/flume1.8/conf -f /root/flumeconf/syslogtcp-logger.conf-n a1 -Dflume.root.logger=INFO,console
测试数据
需要先安装nc
[root@sh01 ~]yum -y install nmap-ncat
[root@sh01 ~]# echo "hello world" | nc hadoop01 6666
9、
Syslogtcp+mem+hdfs
syslogtcp:同上
mem:表示从内存传送
hdfs:表示目的地为HDFS服务器
[root@sh01~]# vi syslogtcp-hdfs.conf
a1.sources = r1
a1.channels = c1
a1.sinks = s1
a1.sources.r1.type=syslogtcp
a1.sources.r1.host = hadoop01
a1.sources.r1.port = 6666
a1.channels.c1.type=memory
a1.channels.c1.capacity=1000
a1.channels.c1.transactionCapacity=100
a1.channels.c1.keep-alive=3
a1.channels.c1.byteCapacityBufferPercentage=20
a1.channels.c1.byteCapacity=800000
a1.sinks.s1.type=hdfs
a1.sinks.s1.hdfs.path=hdfs://hadoop01:8020/flume/%Y/%m/%d/%H%M
a1.sinks.s1.hdfs.filePrefix=flume-hdfs
a1.sinks.s1.hdfs.fileSuffix=.log
a1.sinks.s1.hdfs.inUseSuffix=.tmp
a1.sinks.s1.hdfs.rollInterval=60
a1.sinks.s1.hdfs.rollSize=1024
a1.sinks.s1.hdfs.rollCount=10
a1.sinks.s1.hdfs.idleTimeout=0
a1.sinks.s1.hdfs.batchSize=100
a1.sinks.s1.hdfs.fileType=DataStream
a1.sinks.s1.hdfs.writeFormat=Text
a1.sinks.s1.hdfs.round=true
a1.sinks.s1.hdfs.roundValue=1
a1.sinks.s1.hdfs.roundUnit=second
a1.sinks.s1.hdfs.useLocalTimeStamp=true
a1.sources.r1.channels=c1
a1.sinks.s1.channel=c1
启动 agent
[root@sh01 ~]# flume-ng agent -c /usr/local/flume1.8/conf -f /root/flumeconf/syslogtcp-hdfs.conf -n a1 -Dflume.root.logger=INFO,console
测试数据
[root@sh01 ~]# echo "hello world hello " | nc hadoop01 6666
10、
Syslogtcp+file+hdfs
syslogtcp 同上
file:表示用file作为channel传送介质
hdfs:表示目的地为HDFS
[root@sh01~]# vi syslogtcp-fh.conf
a1.sources = r1
a1.channels = c1
a1.sinks = s1
a1.sources.r1.type=syslogtcp
a1.sources.r1.host = hadoop01
a1.sources.r1.port = 6666
a1.channels.c1.type=file
a1.channels.c1.dataDirs=/home/flume/filechannel/data
a1.channels.c1.checkpointDir=/home/flume/filechannel/point
a1.channels.c1.transactionCapacity=10000
a1.channels.c1.checkpointInterval=30000
a1.channels.c1.capacity=1000000
a1.channels.c1.keep-alive=3
a1.sinks.s1.type=hdfs
a1.sinks.s1.hdfs.path=hdfs://hadoop01:8020/flume/%Y/%m/%d/%H%M
a1.sinks.s1.hdfs.filePrefix=flume-hdfs
a1.sinks.s1.hdfs.fileSuffix=.log
a1.sinks.s1.hdfs.inUseSuffix=.tmp
a1.sinks.s1.hdfs.rollInterval=60
a1.sinks.s1.hdfs.rollSize=1024
a1.sinks.s1.hdfs.rollCount=10
a1.sinks.s1.hdfs.idleTimeout=0
a1.sinks.s1.hdfs.batchSize=100
a1.sinks.s1.hdfs.fileType=DataStream
a1.sinks.s1.hdfs.writeFormat=Text
a1.sinks.s1.hdfs.round=true
a1.sinks.s1.hdfs.roundValue=1
a1.sinks.s1.hdfs.roundUnit=second
a1.sinks.s1.hdfs.useLocalTimeStamp=true
a1.sources.r1.channels=c1
a1.sinks.s1.channel=c1
启动 agent
[root@sh01 ~]# flume-ng agent -c /usr/local/flume1.8/conf -f /root/flumeconf/syslogtcp-fh.conf -n a1 -Dflume.root.logger=INFO,console
测试数据
[root@sh01 ~]# echo "hello world hello " | nc hadoop01 6666