Flume基础【事务、Agent内部原理、拓扑结构、几个案例的配置文件】

一 Flume事务

事务的作用:把多个操作绑定到一起,使得这些操作要么一起成功,要么一起失败。

Flume事务的作用:保证数据不会丢失。
在这里插入图片描述

二 Flume Agent内部原理

在这里插入图片描述

三 Flume拓扑结构

1 简单串联

在这里插入图片描述

2 复制和多路复用

在这里插入图片描述

3 负载均衡和故障转移

在这里插入图片描述

4 聚合

在这里插入图片描述

四 案例

1 拦截器

a1.sources = r1
a1.channels = c1
a1.sinks = k1

a1.sources.r1.type = netcat
a1.sources.r1.bind = hadoop101
a1.sources.r1.port = 44444
#设置一个拦截器(用来向headers中添加时间戳)
a1.sources.r1.interceptors = i1
a1.sources.r1.interceptors.i1.type = timestamp

a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000


a1.sinks.k1.type = logger


a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1

2 串联

#agent1(hadoop102)  netcatsource --> memorychannel --> arvosink
a1.sources = r1
a1.channels = c1
a1.sinks = k1

a1.sources.r1.type = netcat
a1.sources.r1.bind = hadoop102
a1.sources.r1.port = 22222

a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000


a1.sinks.k1.type = avro
#hostname是将数据写到的那台机器
a1.sinks.k1.hostname = hadoop103
a1.sinks.k1.port = 33333


a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1

----------------------------------------------------------
#注意:先启动hadoop103因为hadoop103接收hadoop102的数据的
#agent2(hadoop103) avrosource ---> memorychannel ----> loggersink
a1.sources = r1
a1.channels = c1
a1.sinks = k1

a1.sources.r1.type = avro
a1.sources.r1.bind = hadoop103
a1.sources.r1.port = 33333

a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000

a1.sinks.k1.type = logger

a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1

3 复制和多路复用

#agent1(hadoop101)
a1.sources = r1
a1.channels = c1 c2
a1.sinks = k1 k2

a1.sources.r1.type = exec
a1.sources.r1.command = tail -F /opt/module/flume/demo/123.log

#配置channelSelector - replicating(复制-默认,不配也可以)
#a1.sources.r1.selector.type = replicating

#复用
a1.sources.r1.selector.type = multiplexing
#event(headers | body)根据headers中的key和value进行数据的发送
#state指的是headers中key的值
a1.sources.r1.selector.header = state
#CZ指的是headers中key对应的value值,是CZ发送到c1
a1.sources.r1.selector.mapping.CZ = c1
#US指的是headers中key对应的value值,是US发送到c2
a1.sources.r1.selector.mapping.US = c2

#需求:给event中的headers添加数据
#static拦截器可以给所有的eventheaders设置我们自定义的key和value
a1.sources.r1.interceptors = i1
a1.sources.r1.interceptors.i1.type = static
#设置key值
a1.sources.r1.interceptors.i1.key = state
#设置value值
a1.sources.r1.interceptors.i1.value = CZ


a1.channels.c1.type = memory
a1.channels.c2.type = memory


a1.sinks.k1.type = avro
a1.sinks.k1.hostname = hadoop102
a1.sinks.k1.port = 22222
a1.sinks.k2.type = avro
a1.sinks.k2.hostname = hadoop103
a1.sinks.k2.port = 33333

#一个source对接两个channel
a1.sources.r1.channels = c1 c2 
a1.sinks.k1.channel = c1
a1.sinks.k2.channel = c2

-----------------------------------------------------------------

#agent2(hadoop102)
a1.sources = r1
a1.channels = c1
a1.sinks = k1

a1.sources.r1.type = avro
a1.sources.r1.bind = hadoop102
a1.sources.r1.port = 22222

a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000

a1.sinks.k1.type = logger

a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1


-----------------------------------------------------------------

#agent3(hadoop103)
a1.sources = r1
a1.channels = c1
a1.sinks = k1

a1.sources.r1.type = avro
a1.sources.r1.bind = hadoop103
a1.sources.r1.port = 333333

a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000

#a1.sinks.k1.type = logger
#将event数据存储到本地磁盘上,很少向本地磁盘存储数据
a1.sinks.k1.type = file_roll
#event存放的目录
a1.sinks.k1.sink.directory = /opt/module/flume/demo
#多久时间滚动一个新文件(30秒)
a1.sinks.k1.sink.rollInterval = 30

a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1

4 负载均衡和故障转移

(1)故障转移

#agent1(hadoop101)
a1.sources = r1
a1.channels = c1
a1.sinks = k1 k2

a1.sources.r1.type = netcat
a1.sources.r1.bind = hadoop101
a1.sources.r1.port = 11111

#一个channel对应多个sink时要设置一个sinkgroups
a1.sinkgroups = g1
#该sink组有哪些sink的实例
a1.sinkgroups.g1.sinks = k1 k2
#配置sinkProcessor的类型 1、failover故障转移 2、load_balance负载均衡
a1.sinkgroups.g1.processor.type = failover
#配置sink的优先级,数值越大优先级越高
a1.sinkgroups.g1.processor.priority.k1 = 5
a1.sinkgroups.g1.processor.priority.k2 = 10

a1.channels.c1.type = memory

a1.sinks.k1.type = avro
a1.sinks.k1.hostname = hadoop102
a1.sinks.k1.port = 22222

a1.sinks.k2.type = avro
a1.sinks.k2.hostname = hadoop103
a1.sinks.k2.port = 33333

a1.sources.r1.channels = c1 
a1.sinks.k1.channel = c1
a1.sinks.k2.channel = c1

-----------------------------
#agent2(hadoop102)
a1.sources = r1
a1.channels = c1
a1.sinks = k1

a1.sources.r1.type = avro
a1.sources.r1.bind = hadoop102
a1.sources.r1.port = 22222

a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000

a1.sinks.k1.type = logger

a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1

--------------------------------
#agent3(hadoop103)
a1.sources = r1
a1.channels = c1
a1.sinks = k1

a1.sources.r1.type = avro
a1.sources.r1.bind = hadoop103
a1.sources.r1.port = 33333

a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000

a1.sinks.k1.type = logger

a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1

(2)负载均衡

#agent1(hadoop101)
a1.sources = r1
a1.channels = c1
a1.sinks = k1 k2

a1.sources.r1.type = netcat
a1.sources.r1.bind = hadoop101
a1.sources.r1.port = 11111

#定义一个sink组
a1.sinkgroups = g1
#指明sink组中的sink实例
a1.sinkgroups.g1.sinks = k1 k2
#设置sinkProcessor的类型(负载均衡)
a1.sinkgroups.g1.processor.type = load_balance
# 1 random-随机分配  2 round_robin-轮循
a1.sinkgroups.g1.processor.selector = random

a1.channels.c1.type = memory

a1.sinks.k1.type = avro
a1.sinks.k1.hostname = hadoop102
a1.sinks.k1.port = 22222

a1.sinks.k2.type = avro
a1.sinks.k2.hostname = hadoop103
a1.sinks.k2.port = 33333

a1.sources.r1.channels = c1 
a1.sinks.k1.channel = c1
a1.sinks.k2.channel = c1

-----------------------------
#agent2(hadoop102)
a1.sources = r1
a1.channels = c1
a1.sinks = k1

a1.sources.r1.type = avro
a1.sources.r1.bind = hadoop102
a1.sources.r1.port = 22222

a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000

a1.sinks.k1.type = logger

a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1


--------------------------------
#agent3(hadoop103)
a1.sources = r1
a1.channels = c1
a1.sinks = k1

a1.sources.r1.type = avro
a1.sources.r1.bind = hadoop103
a1.sources.r1.port = 33333

a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000

a1.sinks.k1.type = logger

a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

OneTenTwo76

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值