小白学数据仓库日记day2——flume实例2

  6、

spool: Source来源于目录,有文件进入目录就摄取,File Channel将它暂存到磁盘,最终目的地是HDFS
即只要某个目录不断有文件,HDFS上也会同步到所有数据。
mem:通过内存来传输数据
logger:是传送数据到日志

[root@sh01~]# vi spool-logger.conf
a1.sources = r1  
a1.channels = c1
a1.sinks = s1

a1.sources.r1.type=spooldir
a1.sources.r1.spoolDir = /home/flume/spool
a1.sources.r1.fileSuffix = .COMPLETED
a1.sources.r1.deletePolicy=never
a1.sources.r1.fileHeader=false
a1.sources.r1.fileHeaderKey=file
a1.sources.r1.basenameHeader=false
a1.sources.r1.basenameHeaderKey=basename
a1.sources.r1.batchSize=100
a1.sources.r1.inputCharset=UTF-8
a1.sources.r1.bufferMaxLines=1000

a1.channels.c1.type=memory
a1.channels.c1.capacity=1000
a1.channels.c1.transactionCapacity=100
a1.channels.c1.keep-alive=3
a1.channels.c1.byteCapacityBufferPercentage=20
a1.channels.c1.byteCapacity=800000

a1.sinks.s1.type=logger
a1.sinks.s1.maxBytesToLog=16

a1.sources.r1.channels=c1
a1.sinks.s1.channel=c1

启动 agent
 

[root@sh01 ~]# flume-ng agent -c /usr/local/flume1.8/conf -f /root/flumeconf/spool-logger.conf -n a1 -Dflume.root.logger=INFO,console

测试数据

[root@sh01 ~]# for i in `seq 1 10` ; do echo $i >> /home/flume/spool/$i;done

  7、

http+ mem+logger
http: 表示数据来源是http网络协议,一般接收的请求为get或post请求. 所有的http请求会通过插件格式的Handle转化为一个flume的Event数据.
mem:表示用内存传输通道
logger:表示输出格式为Logger格式

[root@sh01~]# vi http-logger.conf
a1.sources = r1  
a1.channels = c1
a1.sinks = s1

a1.sources.r1.type=http
a1.sources.r1.bind = hadoop01
a1.sources.r1.port = 6666
a1.sources.r1.handler = org.apache.flume.source.http.JSONHandler
a1.sources.r1.handler.nickname = random props

a1.channels.c1.type=memory
a1.channels.c1.capacity=1000
a1.channels.c1.transactionCapacity=100
a1.channels.c1.keep-alive=3
a1.channels.c1.byteCapacityBufferPercentage=20
a1.channels.c1.byteCapacity=800000

a1.sinks.s1.type=logger
a1.sinks.s1.maxBytesToLog=16

a1.sources.r1.channels=c1
a1.sinks.s1.channel=c1

启动 agent
 

[root@sh01 ~]# flume-ng agent -c /usr/local/flume1.8/conf -f /root/flumeconf/http-logger.conf -n a1 -Dflume.root.logger=INFO,console

测试数据

[root@sh01 ~]#  curl -X POST -d '[{"headers":{"name":"zhangsan","pwd":"123456"},"body":"this is my content"}]' http://qianfeng01:6666

  8、

Syslog tcp+mem+logger

syslog tcp:  syslog广泛应用于系统日志。syslog日志消息既可以记录在本地文件中,也可以通过网络发送到接收syslog的服务器。接收syslog的服务器可以对多个设备的syslog消息进行统一的存储,或者解析其中的内容做相应的处理。本数据源指的是syslog的通过tcp来端口来传送数据
mem:通过内存选择器来处理
logger:输出目的地为logger

[root@sh01~]#  vi syslogtcp-logger.conf
a1.sources = r1
a1.channels = c1
a1.sinks = s1

a1.sources.r1.type=syslogtcp
a1.sources.r1.host = hadoop01
a1.sources.r1.port = 6666

a1.channels.c1.type=memory
a1.channels.c1.capacity=1000
a1.channels.c1.transactionCapacity=100
a1.channels.c1.keep-alive=3
a1.channels.c1.byteCapacityBufferPercentage=20
a1.channels.c1.byteCapacity=800000

a1.sinks.s1.type=logger
a1.sinks.s1.maxBytesToLog=16

a1.sources.r1.channels=c1
a1.sinks.s1.channel=c1

启动 agent
 

[root@sh01 ~]# flume-ng agent -c /usr/local/flume1.8/conf -f /root/flumeconf/syslogtcp-logger.conf-n a1 -Dflume.root.logger=INFO,console

测试数据

需要先安装nc
[root@sh01 ~]yum -y install nmap-ncat
[root@sh01 ~]# echo "hello world" | nc hadoop01 6666

  9、

Syslogtcp+mem+hdfs
syslogtcp:同上
mem:表示从内存传送
hdfs:表示目的地为HDFS服务器

[root@sh01~]#  vi syslogtcp-hdfs.conf
a1.sources = r1  
a1.channels = c1
a1.sinks = s1

a1.sources.r1.type=syslogtcp
a1.sources.r1.host = hadoop01
a1.sources.r1.port = 6666

a1.channels.c1.type=memory
a1.channels.c1.capacity=1000
a1.channels.c1.transactionCapacity=100
a1.channels.c1.keep-alive=3
a1.channels.c1.byteCapacityBufferPercentage=20
a1.channels.c1.byteCapacity=800000

a1.sinks.s1.type=hdfs
a1.sinks.s1.hdfs.path=hdfs://hadoop01:8020/flume/%Y/%m/%d/%H%M
a1.sinks.s1.hdfs.filePrefix=flume-hdfs
a1.sinks.s1.hdfs.fileSuffix=.log
a1.sinks.s1.hdfs.inUseSuffix=.tmp
a1.sinks.s1.hdfs.rollInterval=60
a1.sinks.s1.hdfs.rollSize=1024
a1.sinks.s1.hdfs.rollCount=10
a1.sinks.s1.hdfs.idleTimeout=0
a1.sinks.s1.hdfs.batchSize=100
a1.sinks.s1.hdfs.fileType=DataStream
a1.sinks.s1.hdfs.writeFormat=Text
a1.sinks.s1.hdfs.round=true
a1.sinks.s1.hdfs.roundValue=1
a1.sinks.s1.hdfs.roundUnit=second
a1.sinks.s1.hdfs.useLocalTimeStamp=true

a1.sources.r1.channels=c1
a1.sinks.s1.channel=c1

启动 agent
 

[root@sh01 ~]# flume-ng agent -c /usr/local/flume1.8/conf -f /root/flumeconf/syslogtcp-hdfs.conf -n a1 -Dflume.root.logger=INFO,console

测试数据

[root@sh01 ~]# echo "hello world hello " | nc hadoop01 6666

 10、

Syslogtcp+file+hdfs
syslogtcp 同上
file:表示用file作为channel传送介质
hdfs:表示目的地为HDFS

[root@sh01~]# vi syslogtcp-fh.conf
a1.sources = r1  
a1.channels = c1
a1.sinks = s1

a1.sources.r1.type=syslogtcp
a1.sources.r1.host = hadoop01
a1.sources.r1.port = 6666

a1.channels.c1.type=file
a1.channels.c1.dataDirs=/home/flume/filechannel/data
a1.channels.c1.checkpointDir=/home/flume/filechannel/point
a1.channels.c1.transactionCapacity=10000
a1.channels.c1.checkpointInterval=30000
a1.channels.c1.capacity=1000000
a1.channels.c1.keep-alive=3

a1.sinks.s1.type=hdfs
a1.sinks.s1.hdfs.path=hdfs://hadoop01:8020/flume/%Y/%m/%d/%H%M
a1.sinks.s1.hdfs.filePrefix=flume-hdfs
a1.sinks.s1.hdfs.fileSuffix=.log
a1.sinks.s1.hdfs.inUseSuffix=.tmp
a1.sinks.s1.hdfs.rollInterval=60
a1.sinks.s1.hdfs.rollSize=1024
a1.sinks.s1.hdfs.rollCount=10
a1.sinks.s1.hdfs.idleTimeout=0
a1.sinks.s1.hdfs.batchSize=100
a1.sinks.s1.hdfs.fileType=DataStream
a1.sinks.s1.hdfs.writeFormat=Text
a1.sinks.s1.hdfs.round=true
a1.sinks.s1.hdfs.roundValue=1
a1.sinks.s1.hdfs.roundUnit=second
a1.sinks.s1.hdfs.useLocalTimeStamp=true

a1.sources.r1.channels=c1
a1.sinks.s1.channel=c1

启动 agent
 

[root@sh01 ~]# flume-ng agent -c /usr/local/flume1.8/conf -f /root/flumeconf/syslogtcp-fh.conf -n a1 -Dflume.root.logger=INFO,console

测试数据

[root@sh01 ~]# echo "hello world hello " | nc hadoop01 6666
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 3
    评论
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值