Flume 从入门到实时日志采集实例

最新推荐文章于 2024-07-23 08:03:31 发布

doegoo

最新推荐文章于 2024-07-23 08:03:31 发布

阅读量7k

点赞数

分类专栏： flume cloudera 文章标签：实例 flume

本文链接：https://blog.csdn.net/doegoo/article/details/49449465

版权

cloudera 同时被 2 个专栏收录

9 篇文章 0 订阅

订阅专栏

flume

1 篇文章 0 订阅

订阅专栏

#flume-ng 初始配置
bin/flume-ng agent --conf conf --conf-file example.conf --name a1 -Dflume.root.logger=INFO,console
#agent 启动的进程为agent进程
#conf 指定flume-env.sh和log4j.properties的路径
#conf-file 指定agent的配置文件
#name 为agent的名称，与配置文件中的agent名称相同

#flume-ng 用zookeeper管理配置
bin/flume-ng agent --conf conf -z 10.238.18.80:2181,10.238.18.80:2181 -p /flume --name a1 -Dflume.root.logger=INFO,console
#-p 为zookeeper下flume的路径
#--name 为zookeeper下配置数据所在的节点
#-z 为zookeeper集群访问信息多台主机用逗号隔开

#开启一个arvo客户端把数据传到指定collector的arvo的source中
bin/flume-ng avro-client -H localhost -p 4141 -F /opt/cloudera/parcels/CDH/lib/zookeeper/LICENSE.txt

#使用avro的source去接受其它agent传送过来的avro格式的数据（形成agent->collector的模式）
# Describe the agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
a1.sources.r1.type = avro
a1.sources.r1.channels = c1
a1.sources.r1.bind = hostname/ip #必需为主机名或者是本机ip(不能用127.0.0.1)否则用其它主机连接传送数据过来时会出现连接拒绝的错误
a1.sources.r1.port = 41418 #绑定的端口
a1.sources.r1.ipFilter = true #是否使用拦截器
a1.sources.r1.ipFilterRules = deny:name:localhost,allow:ip:
#拦截器规则
#<’allow’ or deny>:<’ip’ or ‘name’ for computer name>:<pattern> or allow/deny:ip/name:pattern
#example: ipFilterRules=allow:ip:127.*,allow:name:localhost,deny:ip:*
# Describe the sink
a1.sinks.k1.type = logger
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1

# 使用 Exec Source 实时日志采集实例
# Describe the agent
a1.sources = r1
a1.channels = c1
a1.sinks = k1
# Describe the sources
a1.sources.r1.type = exec
a1.sources.r1.command = tail -F /root/test.txt
# 用tail命令读取日志中新生成日志到agent的channel中
# Describe the sink
a1.sinks.k1.type = avro
a1.sinks.k1.hostname = InterFinance02
a1.sinks.k1.port = 45454
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
# 启动实时采集日志的agent
bin/flume-ng agent --conf /opt/cloudera/parcels/CDH/lib/flume-ng/conf --conf-file execavro.conf --name a1

# 使用Spooling Directory Source采集日志(官方建议用Spooling Directory Source 代替 Exec Source)
# Describe the agent
a1.sources = s1
a1.channels = c1
a1.sinks = k1
# Describe the sources
a1.sources.s1.type = spooldir
a1.sources.s1.spoolDir = /root/test/flume
# Spooling Directory Source 监控的目录，如果目录下面有新文件生成，就会被采集到channel中，然后文件被重命名加后缀.COMPLETED
a1.sources.s1.fileHeader = true
# Describe the sink
a1.sinks.k1.type = avro
a1.sinks.k1.hostname = InterFinance02
a1.sinks.k1.port = 45454
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1