flume安装

1、tar zxvf apache-flume-1.6.0-bin.tar.gz 

2、mv flume-env.sh.template flume-env.sh
    vi flume-env.sh
    export JAVA_HOME=/root/apps/jdk1.7.0_80

3、export FLUME_HOME=/root/apps/apache-flume-1.6.0-bin
   export PATH=$PATH:$FLUME_HOME/bin

4、vi netcat-logger.conf

    # 定义这个agent中各组件的名字
    a1.sources = r1
    a1.sinks = k1
    a1.channels = c1

    # 描述和配置source组件:r1
    a1.sources.r1.type = netcat
    a1.sources.r1.bind = hadoop1
    a1.sources.r1.port = 44444

    # 描述和配置sink组件:k1
    a1.sinks.k1.type = logger

    # 描述和配置channel组件,此处使用是内存缓存的方式
    a1.channels.c1.type = memory
    a1.channels.c1.capacity = 1000
    a1.channels.c1.transactionCapacity = 100

    # 描述和配置source  channel   sink之间的连接关系
    a1.sources.r1.channels = c1
    a1.sinks.k1.channel = c1
    
    //启动flume
    bin/flume-ng agent -c conf -f conf/netcat-logger.conf -n a1  -Dflume.root.logger=INFO,console
    //安装telnet
    yum -y install telnet
    telnet hadoop1 44444 //发送数据
    
5、
    #定义三大组件的名称
    agent1.sources = source1
    agent1.sinks = sink1
    agent1.channels = channel1

    # 配置source组件
    agent1.sources.source1.type = spooldir
    agent1.sources.source1.spoolDir = /root/data/
    agent1.sources.source1.fileHeader = false

    #配置拦截器
    agent1.sources.source1.interceptors = i1
    agent1.sources.source1.interceptors.i1.type = timestamp
    # 配置sink组件
    agent1.sinks.sink1.type = hdfs
    agent1.sinks.sink1.hdfs.path =/weblog/flume-collection/%y-%m-%d/%H-%M
    agent1.sinks.sink1.hdfs.filePrefix = access_log
    agent1.sinks.sink1.hdfs.maxOpenFiles = 5000
    agent1.sinks.sink1.hdfs.batchSize= 100
    agent1.sinks.sink1.hdfs.fileType = DataStream
    agent1.sinks.sink1.hdfs.writeFormat =Text
    #滚动生成的文件按大小生成100K
    agent1.sinks.sink1.hdfs.rollSize = 10240
    #滚动生成的文件按行数生成
    agent1.sinks.sink1.hdfs.rollCount = 100
    #滚动生成的文件按时间生成
    agent1.sinks.sink1.hdfs.rollInterval = 60
    #开启滚动生成目录
    agent1.sinks.sink1.hdfs.round = true
    #以10为一梯度滚动生成
    agent1.sinks.sink1.hdfs.roundValue = 10
    #单位为分钟
    agent1.sinks.sink1.hdfs.roundUnit = minute

    # Use a channel which buffers events in memory
    agent1.channels.channel1.type = memory
    agent1.channels.channel1.capacity = 500000
    agent1.channels.channel1.transactionCapacity = 600
    agent1.channels.channel1.keep-alive = 120

    # Bind the source and sink to the channel
    agent1.sources.source1.channels = channel1
    agent1.sinks.sink1.channel = channel1

    bin/flume-ng agent -c conf -f conf/spooldir-hdfs-sink.conf -n agent1  -Dflume.root.logger=INFO,console
    
    hdfs dfs -ls /weblog/flume-collection/18-08-07/23-00/
    
    hdfs dfs -cat /weblog/flume-collection/18-08-07/23-00/access_log.1533654126215
    
    //此时/root/data/下的文件受到监听,一旦有文件修改则开始进行数据采集,但是不允许有同名文件
    

6、vi exec-hdfs-sink.conf

    agent1.sources = source1
    agent1.sinks = sink1
    agent1.channels = channel1

    # Describe/configure tail -F source1
    agent1.sources.source1.type = exec
    agent1.sources.source1.command = tail -F /root/logs/access_log
    agent1.sources.source1.channels = channel1

    #configure host for source
    agent1.sources.source1.interceptors = i1 i2
    agent1.sources.source1.interceptors.i1.type = host
    agent1.sources.source1.interceptors.i1.hostHeader = hostname
    #agent1.sources.source1.interceptors.i1.useIP = true//
    agent1.sources.source1.interceptors.i2.type = timestamp

    # Describe sink1
    agent1.sinks.sink1.type = hdfs
    #a1.sinks.k1.channel = c1
    agent1.sinks.sink1.hdfs.path=hdfs://hadoop1:9000/file/%{hostname}/%y-%m-%d/%H-%M
    agent1.sinks.sink1.hdfs.filePrefix = access_log
    agent1.sinks.sink1.hdfs.batchSize= 100
    agent1.sinks.sink1.hdfs.fileType = DataStream
    agent1.sinks.sink1.hdfs.writeFormat =Text
    agent1.sinks.sink1.hdfs.rollSize = 10240
    agent1.sinks.sink1.hdfs.rollCount = 1000
    agent1.sinks.sink1.hdfs.rollInterval = 10
    agent1.sinks.sink1.hdfs.round = true
    agent1.sinks.sink1.hdfs.roundValue = 10
    agent1.sinks.sink1.hdfs.roundUnit = minute

    # Use a channel which buffers events in memory
    agent1.channels.channel1.type = memory
    agent1.channels.channel1.keep-alive = 120
    agent1.channels.channel1.capacity = 500000
    agent1.channels.channel1.transactionCapacity = 600

    # Bind the source and sink to the channel
    agent1.sources.source1.channels = channel1
    agent1.sinks.sink1.channel = channel1

    while true; do date >> access_log ;sleep 0.5s;done//创造一个日志文件
    bin/flume-ng agent -c conf -f conf/exec-hdfs-sink.conf  -n agent1  -Dflume.root.logger=INFO,console
    此时会上传文件到hdfs

7、vi test_regex.conf
    # 定义这个agent中各组件的名字
    a1.sources = r1
    a1.sinks = k1
    a1.channels = c1

    # 描述和配置source组件:r1
    a1.sources.r1.type = netcat
    a1.sources.r1.bind = hadoop1
    a1.sources.r1.port = 44444

    a1.sources.r1.interceptors=i4
    a1.sources.r1.interceptors.i4.type=REGEX_FILTER
    #保留内容中出现hadoop或者是spark的字符串的记录
    a1.sources.r1.interceptors.i4.regex=(hadoop)|(spark)
    a1.sources.r1.interceptors.i4.excludeEvents=false

    # 描述和配置sink组件:k1
    a1.sinks.k1.type = logger

    # 描述和配置channel组件,此处使用是内存缓存的方式
    a1.channels.c1.type = memory
    a1.channels.c1.capacity = 1000
    a1.channels.c1.transactionCapacity = 100

    # 描述和配置source  channel   sink之间的连接关系
    a1.sources.r1.channels = c1
    a1.sinks.k1.channel = c1

    bin/flume-ng agent -c conf -f conf/test_regex.conf  -n a1  -Dflume.root.logger=INFO,console
    
    
    
8、 vi send.conf
    #tail-avro-avro-logger.conf
    # Name the components on this agent
    a1.sources = r1
    a1.sinks = k1
    a1.channels = c1

    # Describe/configure the source
    a1.sources.r1.type = exec
    a1.sources.r1.command = tail -F /root/logs/test.log
    a1.sources.r1.channels = c1

    # Describe the sink
    ##sink端的avro是一个数据发送者
    a1.sinks.k1.type = avro
    a1.sinks.k1.hostname = hadoop1
    a1.sinks.k1.port = 41414
    a1.sinks.k1.batch-size = 10

    # Use a channel which buffers events in memory
    a1.channels.c1.type = memory
    a1.channels.c1.capacity = 1000
    a1.channels.c1.transactionCapacity = 100

    # Bind the source and sink to the channel
    a1.sources.r1.channels = c1
    a1.sinks.k1.channel = c1
    
    bin/flume-ng agent -c conf -f conf/send.conf  -n a1  -Dflume.root.logger=INFO,console
        
    while true; do date >> test.log ;sleep 0.5s;done//创造一个日志文件
    
    vi receive.conf
    
    a1.sources = r1
    a1.sinks =s1
    a1.channels = c1

    ##source中的avro组件是一个接收者服务
    a1.sources.r1.type = avro
    a1.sources.r1.bind = 0.0.0.0
    a1.sources.r1.port = 41414

    a1.sinks.s1.type=hdfs
    a1.sinks.s1.hdfs.path=hdfs://hadoop1:9000/flumedata
    a1.sinks.s1.hdfs.filePrefix = access_log
    a1.sinks.s1.hdfs.batchSize= 100
    a1.sinks.s1.hdfs.fileType = DataStream
    a1.sinks.s1.hdfs.writeFormat =Text
    a1.sinks.s1.hdfs.rollSize = 10240
    a1.sinks.s1.hdfs.rollCount = 1000
    a1.sinks.s1.hdfs.rollInterval = 10
    a1.sinks.s1.hdfs.round = true
    a1.sinks.s1.hdfs.roundValue = 10
    a1.sinks.s1.hdfs.roundUnit = minute

    a1.channels.c1.type = memory
    a1.channels.c1.capacity = 1000
    a1.channels.c1.transactionCapacity = 100

    a1.sources.r1.channels = c1
    a1.sinks.s1.channel = c1
    
    bin/flume-ng agent -c conf -f conf/receive.conf  -n a1  -Dflume.root.logger=INFO,console


9、flume 高可用配置
#agent1 name
agent1.channels = c1
agent1.sources = r1
agent1.sinks = k1 k2

#set gruop
agent1.sinkgroups = g1
#set sink group
agent1.sinkgroups.g1.sinks = k1 k2

#set channel
agent1.channels.c1.type = memory
agent1.channels.c1.capacity = 1000
agent1.channels.c1.transactionCapacity = 100

agent1.sources.r1.channels = c1
agent1.sources.r1.type = exec
agent1.sources.r1.command = tail -F /root/log/test.log

#agent1.sources.r1.interceptors = i1 i2
#agent1.sources.r1.interceptors.i1.type = static
#agent1.sources.r1.interceptors.i1.key = Type
#agent1.sources.r1.interceptors.i1.value = LOGIN
#agent1.sources.r1.interceptors.i2.type = timestamp


# set sink1
agent1.sinks.k1.channel = c1
agent1.sinks.k1.type = avro
agent1.sinks.k1.hostname = hadoop2
agent1.sinks.k1.port = 52020

# set sink2
agent1.sinks.k2.channel = c1
agent1.sinks.k2.type = avro
agent1.sinks.k2.hostname = hadoop3       
agent1.sinks.k2.port = 52020

#set failover
agent1.sinkgroups.g1.processor.type = failover
agent1.sinkgroups.g1.processor.priority.k1 = 10
agent1.sinkgroups.g1.processor.priority.k2 = 5
agent1.sinkgroups.g1.processor.maxpenalty = 10000
#这里首先要申明一个sinkgroups,然后再设置2个sink ,k1与k2,其中2个优先级是10和5,#而processor的maxpenalty被设置为10秒,默认是30秒。‘


10、

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值