flume实践

前提: 1.先安装好telnet

rpm -qa telnet-server

rpm -qa xinetd

yum list |grep telnet

yum install telnet-server.x86_64

yum install telnet.x86_64

yum list |grep xinetd

yum install xinetd.x86_64

systemctl enable xinetd.service

systemctl enable telnet.socket

systemctl start telnet.socket

systemctl start xinetd

2.下载软件包

wget http://mirror.bit.edu.cn/apache/flume/1.6.0/apache-flume-1.6.0-bin.tar.gz

tar zxvf apache-flume-1.6.0-bin.tar.gz

一、单机

1.NetCat

vim conf/flume-netcat.conf

# Name the components on this agent
agent.sources = r1
agent.sinks = k1
agent.channels = c1

# Describe/configuration the source
agent.sources.r1.type = netcat
agent.sources.r1.bind = 127.0.0.1
agent.sources.r1.port = 44444

# Describe the sink
agent.sinks.k1.type = logger

# Use a channel which buffers events in memory
agent.channels.c1.type = memory
agent.channels.c1.capacity = 1000
agent.channels.c1.transactionCapacity = 100

# Bind the source and sink to the channel
agent.sources.r1.channels = c1
agent.sinks.k1.channel = c1 

#验证

#Server bin/flume-ng agent --conf conf --conf-file conf/flume-netcat.conf --name=agent -Dflume.root.logger=INFO,console

#Client telnet master 44444

2.exec

vim conf/flume-exec.conf

# Name the components on this agent
agent.sources = r1
agent.sinks = k1
agent.channels = c1

# Describe/configuration the source
agent.sources.r1.type = exec
agent.sources.r1.command = tail -f /data/hadoop/flume/test.txt

# Describe the sink
agent.sinks.k1.type = logger

# Use a channel which buffers events in memory
agent.channels.c1.type = memory
agent.channels.c1.capacity = 1000
agent.channels.c1.transactionCapacity = 100

# Bind the source and sink to the channel
agent.sources.r1.channels = c1
agent.sinks.k1.channel = c1 

#验证

#Server bin/flume-ng agent --conf conf --conf-file conf/flume-exec.conf --name=agent -Dflume.root.logger=INFO,console

#Client while true;do echodate>> /data/hadoop/flume/test.txt ; sleep 1; done

3.Avro

vim conf/flume-avro.conf

# Define a memory channel called c1 on agent
agent.channels.c1.type = memory

# Define an avro source alled r1 on agent and  tell it
agent.sources.r1.channels = c1
agent.sources.r1.type = avro
agent.sources.r1.bind = 127.0.0.1
agent.sources.r1.port = 44444

# Describe/configuration the source
agent.sinks.k1.type = hdfs
agent.sinks.k1.channel = c1
agent.sinks.k1.hdfs.path = hdfs://master:9000/flume_data_pool
agent.sinks.k1.hdfs.filePrefix = events-
agent.sinks.k1.hdfs.fileType = DataStream
agent.sinks.k1.hdfs.writeFormat = Text
agent.sinks.k1.hdfs.rollSize = 0
agent.sinks.k1.hdfs.rollCount= 600000
agent.sinks.k1.hdfs.rollInterval = 600

agent.channels = c1
agent.sources = r1
agent.sinks = k1

#验证

#Server bin/flume-ng agent --conf conf --conf-file conf/flume-netcat.conf --name=agent -Dflume.root.logger=DEBUG,console

#Client telnet master 44444

4.Avro

# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1

# Describe/configure the source
## exec表示flume回去调用给的命令,然后从给的命令的结果中去拿数据
a1.sources.r1.type = exec
a1.sources.r1.command = tail -F /3_flume/text.txt
a1.sources.r1.channels = c1

# Describe the sink
## 表示下沉到hdfs,类型决定了下面的参数
a1.sinks.k1.type = hdfs
a1.sinks.k1.channel = c1
## 下面的配置告诉用hdfs去写文件的时候写到什么位置,下面的表示不是写死的,而是可以动态的变化的。表示输出的目录名称是可变的
a1.sinks.k1.hdfs.path = /flume/tailout/%y-%m-%d/%H%M/
##表示最后的文件的前缀
a1.sinks.k1.hdfs.filePrefix = events-
## 表示到了需要触发的时间时,是否要更新文件夹,true:表示要
a1.sinks.k1.hdfs.round = true
## 表示每隔1分钟改变一次
a1.sinks.k1.hdfs.roundValue = 1
## 切换文件的时候的时间单位是分钟
a1.sinks.k1.hdfs.roundUnit = minute
## 表示只要过了3秒钟,就切换生成一个新的文件
a1.sinks.k1.hdfs.rollInterval = 3
## 如果记录的文件大于20字节时切换一次
a1.sinks.k1.hdfs.rollSize = 20
## 当写了5个事件时触发
a1.sinks.k1.hdfs.rollCount = 5
## 收到了多少条消息往dfs中追加内容
a1.sinks.k1.hdfs.batchSize = 10
## 使用本地时间戳
a1.sinks.k1.hdfs.useLocalTimeStamp = true
#生成的文件类型,默认是Sequencefile,可用DataStream:为普通文本
a1.sinks.k1.hdfs.fileType = DataStream

# Use a channel which buffers events in memory
##使用内存的方式
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100

# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1

验证: echo '123456' >> /3_flume/text.txt

二、集群

1.故障转移

master:

# agent1 name
agent1.channels = c1
agent1.sources = r1
agent1.sinks = k1 k2

#set group
agent1.sinkgroups = g1

#set channel
agent1.channels.c1.type = memory
agent1.channels.c1.capacity = 1000
agent1.channels.c1.transactionCapacity = 100

agent1.sources.r1.channels = c1
agent1.sources.r1.type = exec
agent1.sources.r1.command = tail -F /3_flume/text.txt

# set sink1
agent1.sinks.k1.channel = c1
agent1.sinks.k1.type = avro
agent1.sinks.k1.hostname = slave1
agent1.sinks.k1.port = 52020

# set sink2
agent1.sinks.k2.channel = c1
agent1.sinks.k2.type = avro
agent1.sinks.k2.hostname = slave2
agent1.sinks.k2.port = 52020

# set sink group
agent1.sinkgroups.g1.sinks = k1 k2

# set failover
agent1.sinkgroups.g1.processor.type = failover
agent1.sinkgroups.g1.processor.k1 = 10
agent1.sinkgroups.g1.processor.k2 = 1
agent1.sinkgroups.g1.processor.maxpenalty = 10000

slave1

# agent1 name
a1.channels = c1
a1.sources = r1
a1.sinks = k1

#set channel
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100

# other node, slave to master
a1.sources.r1.type = avro
a1.sources.r1.bind = slave1
a1.sources.r1.port = 52020

# set sink to hdfs
a1.sinks.k1.type = logger
# a1.sinks.k1.type = hdfs
# a1.sinks.k1.hdfs.path=/flume_data_pool
# a1.sinks.k1.hdfs.fileType=DataStream
# a1.sinks.k1.hdfs.writeFormat=TEXT
# a1.sinks.k1.hdfs.rollInterval=1
# a1.sinks.k1.hdfs.filePrefix = %Y-%m-%d

a1.sources.r1.channels = c1
a1.sinks.k1.channel=c1

slave2

# agent1 name
a1.channels = c1
a1.sources = r1
a1.sinks = k1

#set channel
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100

# other node, slave to master
a1.sources.r1.type = avro
a1.sources.r1.bind = slave2
a1.sources.r1.port = 52020

# set sink to hdfs
a1.sinks.k1.type = logger
# a1.sinks.k1.type = hdfs
# a1.sinks.k1.hdfs.path=/flume_data_pool
# a1.sinks.k1.hdfs.fileType=DataStream
# a1.sinks.k1.hdfs.writeFormat=TEXT
# a1.sinks.k1.hdfs.rollInterval=1
# a1.sinks.k1.hdfs.filePrefix = %Y-%m-%d

a1.sources.r1.channels = c1
a1.sinks.k1.channel=c1

2.负载均衡

将故障转移master中的

# set failover
agent1.sinkgroups.g1.processor.type = failover
agent1.sinkgroups.g1.processor.k1 = 10
agent1.sinkgroups.g1.processor.k2 = 1
agent1.sinkgroups.g1.processor.maxpenalty = 10000

换成

a1.sinkgroups = g1
a1.sinkgroups.g1.sinks = k1 k2
a1.sinkgroups.g1.processor.type = load_balance
a1.sinkgroups.g1.processor.selector = round_robin

测试: for i inseq 1 100; do echo '==============' $i >> /3_flume/text.txt; done

2.拦截与过滤

(Timestamp)

a1.sources = r1
a1.sinks = k1
a1.channels = c1

a1.sources.r1.type = http
a1.sources.r1.host = master
a1.sources.r1.port = 52020
a1.sources.r1.channels = c1

a1.sources.r1.interceptors = i1  
a1.sources.r1.interceptors.i1.preserveExisting= false  
a1.sources.r1.interceptors.i1.type = timestamp  

a1.sinks.k1.type = hdfs  
a1.sinks.k1.channel = c1  
a1.sinks.k1.hdfs.path =hdfs://master:9000/flume/%Y-%m-%d/%H%M  
a1.sinks.k1.hdfs.filePrefix = badou.
a1.sinks.k1.hdfs.fileType=DataStream  

a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100

a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1

测试:

curl -X POST -d '[{"headers":{"flume":"nihao"},"body":"hello"}]' http://master:52020

转载于:https://my.oschina.net/u/3746234/blog/3018674

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值