flume安装

最新推荐文章于 2024-08-28 12:19:29 发布

牛鼻子老赵

最新推荐文章于 2024-08-28 12:19:29 发布

阅读量108

点赞数

分类专栏：大数据文章标签： flume 大数据

本文链接：https://blog.csdn.net/qq_36416053/article/details/82392775

版权

大数据专栏收录该内容

10 篇文章 0 订阅

订阅专栏

1、tar zxvf apache-flume-1.6.0-bin.tar.gz

2、mv flume-env.sh.template flume-env.sh
vi flume-env.sh
export JAVA_HOME=/root/apps/jdk1.7.0_80

3、export FLUME_HOME=/root/apps/apache-flume-1.6.0-bin
export PATH=$PATH:$FLUME_HOME/bin

4、vi netcat-logger.conf

   # 定义这个agent中各组件的名字
   a1.sources = r1
   a1.sinks = k1
   a1.channels = c1

   # 描述和配置source组件：r1
   a1.sources.r1.type = netcat
   a1.sources.r1.bind = hadoop1
   a1.sources.r1.port = 44444

# 描述和配置sink组件：k1
a1.sinks.k1.type = logger

   # 描述和配置channel组件，此处使用是内存缓存的方式
   a1.channels.c1.type = memory
   a1.channels.c1.capacity = 1000
   a1.channels.c1.transactionCapacity = 100

   # 描述和配置source channel sink之间的连接关系
   a1.sources.r1.channels = c1
   a1.sinks.k1.channel = c1

   //启动flume
   bin/flume-ng agent -c conf -f conf/netcat-logger.conf -n a1 -Dflume.root.logger=INFO,console
   //安装telnet
   yum -y install telnet
   telnet hadoop1 44444 //发送数据

5、
   #定义三大组件的名称
   agent1.sources = source1
   agent1.sinks = sink1
   agent1.channels = channel1

   # 配置source组件
   agent1.sources.source1.type = spooldir
   agent1.sources.source1.spoolDir = /root/data/
   agent1.sources.source1.fileHeader = false

   #配置拦截器
   agent1.sources.source1.interceptors = i1
   agent1.sources.source1.interceptors.i1.type = timestamp
   # 配置sink组件
   agent1.sinks.sink1.type = hdfs
   agent1.sinks.sink1.hdfs.path =/weblog/flume-collection/%y-%m-%d/%H-%M
   agent1.sinks.sink1.hdfs.filePrefix = access_log
   agent1.sinks.sink1.hdfs.maxOpenFiles = 5000
   agent1.sinks.sink1.hdfs.batchSize= 100
   agent1.sinks.sink1.hdfs.fileType = DataStream
   agent1.sinks.sink1.hdfs.writeFormat =Text
   #滚动生成的文件按大小生成100K
   agent1.sinks.sink1.hdfs.rollSize = 10240
   #滚动生成的文件按行数生成
   agent1.sinks.sink1.hdfs.rollCount = 100
   #滚动生成的文件按时间生成
   agent1.sinks.sink1.hdfs.rollInterval = 60
   #开启滚动生成目录
   agent1.sinks.sink1.hdfs.round = true
   #以10为一梯度滚动生成
   agent1.sinks.sink1.hdfs.roundValue = 10
   #单位为分钟
   agent1.sinks.sink1.hdfs.roundUnit = minute

   # Use a channel which buffers events in memory
   agent1.channels.channel1.type = memory
   agent1.channels.channel1.capacity = 500000
   agent1.channels.channel1.transactionCapacity = 600
   agent1.channels.channel1.keep-alive = 120

   # Bind the source and sink to the channel
   agent1.sources.source1.channels = channel1
   agent1.sinks.sink1.channel = channel1

   bin/flume-ng agent -c conf -f conf/spooldir-hdfs-sink.conf -n agent1 -Dflume.root.logger=INFO,console

   hdfs dfs -ls /weblog/flume-collection/18-08-07/23-00/

   hdfs dfs -cat /weblog/flume-collection/18-08-07/23-00/access_log.1533654126215

   //此时/root/data/下的文件受到监听，一旦有文件修改则开始进行数据采集，但是不允许有同名文件

6、vi exec-hdfs-sink.conf

   agent1.sources = source1
   agent1.sinks = sink1
   agent1.channels = channel1

   # Describe/configure tail -F source1
   agent1.sources.source1.type = exec
   agent1.sources.source1.command = tail -F /root/logs/access_log
   agent1.sources.source1.channels = channel1

   #configure host for source
   agent1.sources.source1.interceptors = i1 i2
   agent1.sources.source1.interceptors.i1.type = host
   agent1.sources.source1.interceptors.i1.hostHeader = hostname
   #agent1.sources.source1.interceptors.i1.useIP = true//
   agent1.sources.source1.interceptors.i2.type = timestamp

   # Describe sink1
   agent1.sinks.sink1.type = hdfs
   #a1.sinks.k1.channel = c1
   agent1.sinks.sink1.hdfs.path=hdfs://hadoop1:9000/file/%{hostname}/%y-%m-%d/%H-%M
   agent1.sinks.sink1.hdfs.filePrefix = access_log
   agent1.sinks.sink1.hdfs.batchSize= 100
   agent1.sinks.sink1.hdfs.fileType = DataStream
   agent1.sinks.sink1.hdfs.writeFormat =Text
   agent1.sinks.sink1.hdfs.rollSize = 10240
   agent1.sinks.sink1.hdfs.rollCount = 1000
   agent1.sinks.sink1.hdfs.rollInterval = 10
   agent1.sinks.sink1.hdfs.round = true
   agent1.sinks.sink1.hdfs.roundValue = 10
   agent1.sinks.sink1.hdfs.roundUnit = minute

   # Use a channel which buffers events in memory
   agent1.channels.channel1.type = memory
   agent1.channels.channel1.keep-alive = 120
   agent1.channels.channel1.capacity = 500000
   agent1.channels.channel1.transactionCapacity = 600

   # Bind the source and sink to the channel
   agent1.sources.source1.channels = channel1
   agent1.sinks.sink1.channel = channel1

   while true; do date >> access_log ;sleep 0.5s;done//创造一个日志文件
   bin/flume-ng agent -c conf -f conf/exec-hdfs-sink.conf -n agent1 -Dflume.root.logger=INFO,console
   此时会上传文件到hdfs

7、vi test_regex.conf
   # 定义这个agent中各组件的名字
   a1.sources = r1
   a1.sinks = k1
   a1.channels = c1

   # 描述和配置source组件：r1
   a1.sources.r1.type = netcat
   a1.sources.r1.bind = hadoop1
   a1.sources.r1.port = 44444

   a1.sources.r1.interceptors=i4
   a1.sources.r1.interceptors.i4.type=REGEX_FILTER
   #保留内容中出现hadoop或者是spark的字符串的记录
   a1.sources.r1.interceptors.i4.regex=(hadoop)|(spark)
   a1.sources.r1.interceptors.i4.excludeEvents=false

# 描述和配置sink组件：k1
a1.sinks.k1.type = logger

   # 描述和配置channel组件，此处使用是内存缓存的方式
   a1.channels.c1.type = memory
   a1.channels.c1.capacity = 1000
   a1.channels.c1.transactionCapacity = 100

   # 描述和配置source channel sink之间的连接关系
   a1.sources.r1.channels = c1
   a1.sinks.k1.channel = c1

   bin/flume-ng agent -c conf -f conf/test_regex.conf -n a1 -Dflume.root.logger=INFO,console



8、 vi send.conf
   #tail-avro-avro-logger.conf
   # Name the components on this agent
   a1.sources = r1
   a1.sinks = k1
   a1.channels = c1

   # Describe/configure the source
   a1.sources.r1.type = exec
   a1.sources.r1.command = tail -F /root/logs/test.log
   a1.sources.r1.channels = c1

   # Describe the sink
   ##sink端的avro是一个数据发送者
   a1.sinks.k1.type = avro
   a1.sinks.k1.hostname = hadoop1
   a1.sinks.k1.port = 41414
   a1.sinks.k1.batch-size = 10

   # Use a channel which buffers events in memory
   a1.channels.c1.type = memory
   a1.channels.c1.capacity = 1000
   a1.channels.c1.transactionCapacity = 100

   # Bind the source and sink to the channel
   a1.sources.r1.channels = c1
   a1.sinks.k1.channel = c1

   bin/flume-ng agent -c conf -f conf/send.conf -n a1 -Dflume.root.logger=INFO,console

   while true; do date >> test.log ;sleep 0.5s;done//创造一个日志文件

   vi receive.conf

   a1.sources = r1
   a1.sinks =s1
   a1.channels = c1

   ##source中的avro组件是一个接收者服务
   a1.sources.r1.type = avro
   a1.sources.r1.bind = 0.0.0.0
   a1.sources.r1.port = 41414

   a1.sinks.s1.type=hdfs
   a1.sinks.s1.hdfs.path=hdfs://hadoop1:9000/flumedata
   a1.sinks.s1.hdfs.filePrefix = access_log
   a1.sinks.s1.hdfs.batchSize= 100
   a1.sinks.s1.hdfs.fileType = DataStream
   a1.sinks.s1.hdfs.writeFormat =Text
   a1.sinks.s1.hdfs.rollSize = 10240
   a1.sinks.s1.hdfs.rollCount = 1000
   a1.sinks.s1.hdfs.rollInterval = 10
   a1.sinks.s1.hdfs.round = true
   a1.sinks.s1.hdfs.roundValue = 10
   a1.sinks.s1.hdfs.roundUnit = minute

   a1.channels.c1.type = memory
   a1.channels.c1.capacity = 1000
   a1.channels.c1.transactionCapacity = 100

   a1.sources.r1.channels = c1
   a1.sinks.s1.channel = c1

   bin/flume-ng agent -c conf -f conf/receive.conf -n a1 -Dflume.root.logger=INFO,console

9、flume 高可用配置
#agent1 name
agent1.channels = c1
agent1.sources = r1
agent1.sinks = k1 k2

#set gruop
agent1.sinkgroups = g1
#set sink group
agent1.sinkgroups.g1.sinks = k1 k2

#set channel
agent1.channels.c1.type = memory
agent1.channels.c1.capacity = 1000
agent1.channels.c1.transactionCapacity = 100

agent1.sources.r1.channels = c1
agent1.sources.r1.type = exec
agent1.sources.r1.command = tail -F /root/log/test.log

#agent1.sources.r1.interceptors = i1 i2
#agent1.sources.r1.interceptors.i1.type = static
#agent1.sources.r1.interceptors.i1.key = Type
#agent1.sources.r1.interceptors.i1.value = LOGIN
#agent1.sources.r1.interceptors.i2.type = timestamp

# set sink1
agent1.sinks.k1.channel = c1
agent1.sinks.k1.type = avro
agent1.sinks.k1.hostname = hadoop2
agent1.sinks.k1.port = 52020

# set sink2
agent1.sinks.k2.channel = c1
agent1.sinks.k2.type = avro
agent1.sinks.k2.hostname = hadoop3
agent1.sinks.k2.port = 52020

#set failover
agent1.sinkgroups.g1.processor.type = failover
agent1.sinkgroups.g1.processor.priority.k1 = 10
agent1.sinkgroups.g1.processor.priority.k2 = 5
agent1.sinkgroups.g1.processor.maxpenalty = 10000
#这里首先要申明一个sinkgroups,然后再设置2个sink ,k1与k2,其中2个优先级是10和5，#而processor的maxpenalty被设置为10秒，默认是30秒。‘

10、

牛鼻子老赵

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
flume安装

1、tar zxvf apache-flume-1.6.0-bin.tar.gz 2、mv flume-env.sh.template flume-env.sh vi flume-env.sh export JAVA_HOME=/root/apps/jdk1.7.0_803、export FLUME_HOME=/root/apps/apache-flume-1.6.0-bi...
复制链接

扫一扫

专栏目录