flume学习笔记

1.flume向hdfs传输数据

上传apache-flume-1.9.0-bin.tar.gz到hadoop102 /opt/software下

tar -zxvf apache-flume-1.9.0-bin.tar.gz -C /opt/module
cd /opt/module/
mv apache-flume-1.9.0 flume-1.9.0

启动hdfs
hdp.sh start

# 执行脚本,模拟日志生产
# 在/opt/module/datas目录下创建a.log
while true; do echo $RANDOM >> a.log; sleep 0.01; done
1.1配置flume的 taildir-m-hdfs.conf
a1.sources = r1
a1.sinks = k1
a1.channels = c1

a1.sources.r1.channels = c1
a1.sources.r1.type = TAILDIR
a1.sources.r1.filegroups = g1
a1.sources.r1.filegroups.g1 = /opt/module/datas/a.*
a1.sources.r1.headers.g1.x = y
a1.sources.r1.fileHeader = true
a1.sources.r1.fileHeaderKey = filepath
a1.sources.r1.interceptors = i1
a1.sources.r1.interceptors.i1.type = timestamp
a1.sources.r1.interceptors.i1.headerName = timestamp

a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 1000


a1.sinks.k1.channel = c1
a1.sinks.k1.type = hdfs
a1.sinks.k1.hdfs.path = hdfs://hadoop102:8020/flumedata/%Y-%m-%d/%H
a1.sinks.k1.hdfs.filePrefix = mylog-
a1.sinks.k1.hdfs.fileSuffix = .log
a1.sinks.k1.hdfs.rollSize = 268435456
a1.sinks.k1.hdfs.rollInterval = 120
a1.sinks.k1.hdfs.rollCount = 0
a1.sinks.k1.hdfs.batchSize = 1000
a1.sinks.k1.hdfs.fileType = CompressedStream
a1.sinks.k1.hdfs.codeC = snappy
a1.sinks.k1.hdfs.useLocalTimeStamp = false
2.启动flume-agent
[xx@hadoop102 flume-1.9.0]$ bin/flume-ng agent -c ./conf -f ./agentconf/taildir-m-hdfs.conf -n a1 -Dflume.root.logger=DEBUG,console

在这里插入图片描述

成功!

2.flume级联案例

在这里插入图片描述

2.1.hadoop102上flume配置taildir-f-avrosink.conf
# 第一级的agent配置,第一级的所有节点配置都相同
a1.sources = r1
a1.sinks = k1
a1.channels = c1

a1.sources.r1.channels = c1
a1.sources.r1.type = TAILDIR
a1.sources.r1.filegroups = g1
a1.sources.r1.filegroups.g1 = /opt/module/datas/a.*
a1.sources.r1.fileHeader = false


a1.channels.c1.type = file


a1.sinks.k1.channel = c1
a1.sinks.k1.type = avro
a1.sinks.k1.hostname = hadoop103
a1.sinks.k1.port = 4444
2.2.hadoop103flume上配置avro-f-kfksink.conf
# 第二级的agent配置 
a1.sources = r1
a1.sinks = k1
a1.channels = c1


a1.sources.r1.channels = c1
a1.sources.r1.type = avro
a1.sources.r1.bind = hadoop103
a1.sources.r1.port = 4444
a1.sources.r1.batchSize = 100


a1.channels.c1.type = file


a1.sinks.k1.channel = c1
a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
a1.sinks.k1.kafka.bootstrap.servers = hadoop102:9092,hadoop103:9092,hadoop104:9092
a1.sinks.k1.kafka.topic = doitedu17
a1.sinks.k1.kafka.producer.acks = 1
2.3. 模拟生产日志

hadoop102 /opt/module/datas目录

while true; do echo $RANDOM >> a.log; sleep 0.01; done
2.4.启动zookeeper, kafka (zk和kafka配置
[xx@hadoop102 ~]$ zk.sh start
JMX enabled by default
Using config: /opt/module/zookeeper-3.4.6/bin/../conf/zoo.cfg
Starting zookeeper ... STARTED
JMX enabled by default
Using config: /opt/module/zookeeper-3.4.6/bin/../conf/zoo.cfg
Starting zookeeper ... STARTED
JMX enabled by default
Using config: /opt/module/zookeeper-3.4.6/bin/../conf/zoo.cfg
Starting zookeeper ... kSTARTED
[xx@hadoop102 ~]$ kafka.sh start
 --------启动 hadoop102 Kafka-------
 --------启动 hadoop103 Kafka-------
 --------启动 hadoop104 Kafka-------
[xx@hadoop102 ~]$ xcall.sh  jps
=======hadoop102=======
2130 Kafka
2233 Jps
1803 QuorumPeerMain
=======hadoop103=======
1776 QuorumPeerMain
2199 Jps
2105 Kafka
=======hadoop104=======
1771 QuorumPeerMain
2189 Jps
2094 Kafka

kafka基本命令操作

## topic查看
bin/kafka-topics.sh --list --zookeeper doitedu01:2181

## topic创建
bin/kafka-topics.sh --create --topic topic2 --partitions 2 --replication-factor 2 --zookeeper doitedu01:2181

## 启动一个控制台生产者来生产数据
bin/kafka-console-producer.sh --broker-list doitedu01:9092,doitedu02:9092,doitedu03:9092 --topic topic2
>hello tom

## 启动一个控制台消费者来消费数据
bin/kafka-console-consumer.sh --bootstrap-server doitedu01:9092,doitedu02:9092,doitedu03:9092 --topic topic2 --from-beginning
2.5.级联采集的实践操作步骤
1.启动级联flume系统的第2级 hadoop103
[xx@hadoop103 flume-1.9.0]$ bin/flume-ng agent -c conf/ -f agengConf/avro-f-kfksink.conf  -n a1 -Dflume.root.logger=DEBUG,console
2.启动级联flume系统的第1级
[xx@hadoop102 flume-1.9.0]$ bin/flume-ng agent -c conf/ -f agentConf/taildir-f-avrosink.conf -n a1 -Dflume.root.logger=DEBUG,console
3.检查数据是否传输到了kafka,启动一个消费者来消费
[atguigu@hadoop104 kafka]$ bin/kafka-console-consumer.sh   --bootstrap-server hadoop102:9092 --topic doitedu17
26088
1901
15413
32216
18167
11155
2907
22079
1172
16349
7927
2059

成功!

3.channel选择器-自定义拦截器-项目分流需求

在这里插入图片描述

0.编写一个拦截器
package com.zh.flume;

import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.interceptor.Interceptor;

import java.util.List;

public class MultiplexingInterceptor implements Interceptor {
    private Integer flagfield = 0;
    private Integer timestampfield = 0;

    public MultiplexingInterceptor(Integer flagfield,Integer timestampfield) {
        this.flagfield = flagfield;
        this.timestampfield = timestampfield;
    }

    /**
     * 拦截器构造实例后的初始化工作
     */
    public void initialize() {

    }

    // 日志格式:
    // u01,ev1,mall,1568738583468
    public Event intercept(Event event) {
        // 根据event的数据内容,以及参数中指定的标记字段,来产生不同的header值
        byte[] body = event.getBody();
        String line = new String(body);

        String[] split = line.split(",");

        // 切出业务标记,并添加到header
        event.getHeaders().put("flag",split[flagfield]);

        // 切出行为(事件)时间戳,并添加到header
        event.getHeaders().put("timestamp",split[timestampfield]);


        return event;
    }

    public List<Event> intercept(List<Event> list) {
        for (Event event : list) {
            intercept(event);
        }
        return list;
    }


    /**
     * 拦截器销毁之前的一些清理工作
     */
    public void close() {

    }


    public static class MultiplexingInterceptorBuilder implements Interceptor.Builder{

        Integer flagfield = 0;
        Integer timestampfield = 0;
        /**
         * 用户构建一个拦截器实例
         * @return
         */
        public Interceptor build() {

            return new MultiplexingInterceptor(flagfield,timestampfield);
        }

        /**
         * 获取参数的入口
         * @param context
         */
        public void configure(Context context) {
            flagfield = context.getInteger("flagfield");
            timestampfield = context.getInteger("timestampfield");

        }
    }
}

打包上传到flume的lib目录

1.-- 模拟日志生成的脚本:
while true
do
if [ $(($RANDOM % 2)) -eq 0 ]
then
echo "u$RANDOM,e1,waimai,`date +%s`000" >> a.log
else
echo "u$RANDOM,e1,mall,`date +%s`000" >> a.log
fi
sleep 0.2
done
2.flume的agent配置:2个channel,1个选择器,1个自定义拦截器
[xx@hadoop102 flume-1.9.0]$ vim agentConf/multiplexing.conf

a1.sources = r1
a1.channels = c1 c2
a1.sinks = k1 k2

a1.sources.r1.channels = c1 c2
a1.sources.r1.type = TAILDIR
a1.sources.r1.filegroups = g1
a1.sources.r1.filegroups.g1 = /opt/module/datas/a.*
a1.sources.r1.fileHeader = false

a1.sources.r1.interceptors = i1
a1.sources.r1.interceptors.i1.type = com.zh.flume.MultiplexingInterceptor$MultiplexingInterceptorBuilder
a1.sources.r1.interceptors.i1.flagfield = 2
a1.sources.r1.interceptors.i1.timestampfield = 3

a1.sources.r1.interceptors = i1
a1.sources.r1.interceptors.i1.type = com.zh.flume.MultiplexingInterceptor$MultiplexingInterceptorBuilder
a1.sources.r1.interceptors.i1.flagfield = 2
a1.sources.r1.interceptors.i1.timestampfield = 3

a1.sources.r1.selector.type = multiplexing
a1.sources.r1.selector.header = flag
a1.sources.r1.selector.mapping.mall = c1
a1.sources.r1.selector.mapping.waimai = c2
a1.sources.r1.selector.default = c2


a1.channels.c1.type = memory
a1.channels.c1.capacity = 2000
a1.channels.c1.transactionCapacity = 1000

a1.channels.c2.type = memory
a1.channels.c2.capacity = 2000
a1.channels.c2.transactionCapacity = 1000


a1.sinks.k1.channel = c1
a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
a1.sinks.k1.kafka.bootstrap.servers = hadoop102:9092,hadoop103:9092,hadoop104:9092
a1.sinks.k1.kafka.topic = mall
a1.sinks.k1.kafka.producer.acks = 1

a1.sinks.k2.channel = c2
a1.sinks.k2.type = hdfs
a1.sinks.k2.hdfs.path = hdfs://hadoop102:8020/waimai/%Y-%m-%d/%H
a1.sinks.k2.hdfs.filePrefix = doitedu-log-
a1.sinks.k2.hdfs.fileSuffix = .log
a1.sinks.k2.hdfs.rollSize = 268435456
a1.sinks.k2.hdfs.rollInterval = 120
a1.sinks.k2.hdfs.rollCount = 0
a1.sinks.k2.hdfs.batchSize = 1000
a1.sinks.k2.hdfs.fileType = DataStream
a1.sinks.k2.hdfs.useLocalTimeStamp = false
4.启动hdfs,zookeeper,kafka
hdp.sh start
zk.sh start
kafka.sh start

[xx@hadoop102 ~]$ xcall.sh  jps
=======hadoop102=======
2000 NameNode
3093 Kafka
2139 DataNode
3196 Jps
2477 NodeManager
2765 QuorumPeerMain
2655 JobHistoryServer
=======hadoop103=======
2545 QuorumPeerMain
2023 ResourceManager
2874 Kafka
2970 Jps
1836 DataNode
2156 NodeManager
=======hadoop104=======
2528 Kafka
2625 Jps
2037 NodeManager
1830 DataNode
1946 SecondaryNameNode
2205 QuorumPeerMain

脚本文件:

hdp.sh

#!/bin/bash
if [ $# -lt 1 ]
then
    echo "No Args Input..."
    exit ;
fi
case $1 in
"start"){
        echo " =================== 启动 hadoop集群 ==================="

        echo " --------------- 启动 hdfs ---------------"
        ssh hadoop102 "/opt/module/hadoop-3.1.3/sbin/start-dfs.sh"
        echo " --------------- 启动 yarn ---------------"
        ssh hadoop103 "/opt/module/hadoop-3.1.3/sbin/start-yarn.sh"
        echo " --------------- 启动 historyserver ---------------"
        ssh hadoop102 "/opt/module/hadoop-3.1.3/bin/mapred --daemon start historyserver"
};;
"stop"){
        echo " =================== 关闭 hadoop集群 ==================="

        echo " --------------- 关闭 yarn ---------------"
        ssh hadoop103 "/opt/module/hadoop-3.1.3/sbin/stop-yarn.sh"
        echo " --------------- 关闭 hdfs ---------------"
        ssh hadoop102 "/opt/module/hadoop-3.1.3/sbin/stop-dfs.sh"
        echo " --------------- 关闭 historyserver ---------------"
        ssh hadoop102 "/opt/module/hadoop-3.1.3/bin/mapred --daemon stop historyserver"
};;
*){
    echo "Input Args Error..."
};;
esac

zk.sh:

#!/bin/bash
case $1 in
"start") {
        for i in hadoop102 hadoop103 hadoop104
        do
                ssh $i "source /etc/profile; /opt/module/zookeeper-3.4.6/bin/zkServer.sh start"
        done
};;
"stop") {
        for i in hadoop102 hadoop103 hadoop104
        do
                ssh $i "source /etc/profile; /opt/module/zookeeper-3.4.6/bin/zkServer.sh stop"
        done
};;
"status") {
        for i in hadoop102 hadoop103 hadoop104
        do
                ssh $i "source /etc/profile; /opt/module/zookeeper-3.4.6/bin/zkServer.sh status"
        done
};;
esac

kafka.sh:

#! /bin/bash
case $1 in
"start"){
        for i in hadoop102 hadoop103 hadoop104
        do
                echo " --------启动 $i Kafka-------"
                ssh $i "source /etc/profile; /opt/module/kafka/bin/kafka-server-start.sh -daemon /opt/module/kafka/config/server.properties "
        done
};;
"stop"){
        for i in hadoop102 hadoop103 hadoop104
        do
                echo " --------停止 $i Kafka-------"
                ssh $i "source /etc/profile; /opt/module/kafka/bin/kafka-server-stop.sh stop"
        done
};;
esac

5.启动flume agent
[xx@hadoop102 flume-1.9.0]$ bin/flume-ng agent -c conf/ -f agentConf/multiplexing.conf -n a1 -Dflume.root.logger=DEBUG,console
6.测试查看kafka和hdfs数据
[xx@hadoop103 kafka]$ bin/kafka-console-consumer.sh --bootstrap-server hadoop102:9092,hadoop103:9092,hadoop104:9092 --topic mall2
u27323,e1,mall,1616769650000
u27407,e1,mall,1616769650000
u31325,e1,mall,1616769651000
u5017,e1,mall,1616769651000
u4703,e1,mall,1616769651000
u26035,e1,mall,1616769651000
u2202,e1,mall,1616769652000
u16182,e1,mall,1616769652000
u30472,e1,mall,1616769653000
u15990,e1,mall,1616769653000
u32291,e1,mall,1616769654000
u1236,e1,mall,1616769654000
u17401,e1,mall,1616769654000
u22316,e1,mall,1616769654000

在这里插入图片描述

[atguigu@hadoop103 kafka]$ hdfs dfs -tail hdfs://hadoop102:8020/waimai/2021-03-26/22/doitedu-log-.1616769585211.log

u27887,e1,waimai,1616769691000
u4557,e1,waimai,1616769692000
u3,e1,waimai,1616769692000
u932,e1,waimai,1616769692000
u2070,e1,waimai,1616769692000
u17029,e1,waimai,1616769693000
u15891,e1,waimai,1616769693000
u17386,e1,waimai,1616769693000
u28482,e1,waimai,1616769693000
u19256,e1,waimai,1616769695000
u3694,e1,waimai,1616769695000
u22488,e1,waimai,1616769696000
u12503,e1,waimai,1616769696000
u963,e1,waimai,1616769696000
u20473,e1,waimai,1616769696000
u15916,e1,waimai,1616769697000
u15052,e1,waimai,1616769697000
u8609,e1,waimai,1616769697000
u14148,e1,waimai,1616769697000
u24441,e1,waimai,1616769698000

成功!

4.级联汇聚点HA高可用

在这里插入图片描述

级联高可用配置第一级(hadoop102)failover.conf
a1.sources = r1
a1.sinks = k1 k2
a1.channels = c1

a1.sources.r1.channels = c1
a1.sources.r1.type = TAILDIR
a1.sources.r1.filegroups = g1
a1.sources.r1.filegroups.g1 = /opt/module/datas/a.*
a1.sources.r1.fileHeader = false


a1.channels.c1.type = memory
a1.channels.c1.capacity = 2000
a1.channels.c1.transactionCapacity = 1000


a1.sinks.k1.channel = c1
a1.sinks.k1.type = avro
a1.sinks.k1.hostname = hadoop103
a1.sinks.k1.port = 4444


a1.sinks.k2.channel = c1
a1.sinks.k2.type = avro
a1.sinks.k2.hostname = hadoop104
a1.sinks.k2.port = 4444


a1.sinkgroups = g1
a1.sinkgroups.g1.sinks = k1 k2
a1.sinkgroups.g1.processor.type = failover
a1.sinkgroups.g1.processor.priority.k1 = 200
a1.sinkgroups.g1.processor.priority.k2 = 100
a1.sinkgroups.g1.processor.maxpenalty = 5000
级联高可用配置第2级(hadoop103)failover.conf
a1.sources = r1
a1.sinks = k1
a1.channels = c1


a1.sources.r1.channels = c1
a1.sources.r1.type = avro
a1.sources.r1.bind = hadoop103
a1.sources.r1.port = 4444
a1.sources.r1.batchSize = 100


a1.channels.c1.type = memory
a1.channels.c1.capacity = 2000
a1.channels.c1.transactionCapacity = 1000

a1.sinks.k1.channel = c1
a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
a1.sinks.k1.kafka.bootstrap.servers = hadoop102:9092,hadoop103:9092,hadoop104:9092
a1.sinks.k1.kafka.topic = failover
a1.sinks.k1.kafka.producer.acks = 1
级联高可用配置第2级(hadoop104)failover.conf
a1.sources = r1
a1.sinks = k1
a1.channels = c1


a1.sources.r1.channels = c1
a1.sources.r1.type = avro
a1.sources.r1.bind = hadoop104
a1.sources.r1.port = 4444
a1.sources.r1.batchSize = 100


a1.channels.c1.type = memory
a1.channels.c1.capacity = 2000
a1.channels.c1.transactionCapacity = 1000

a1.sinks.k1.channel = c1
a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
a1.sinks.k1.kafka.bootstrap.servers = hadoop102:9092,hadoop103:9092,hadoop104:9092
a1.sinks.k1.kafka.topic = failover
a1.sinks.k1.kafka.producer.acks = 1

/opt/module/datas

while true
do
if [ $(($RANDOM % 2)) -eq 0 ]
then
echo "u$RANDOM,e1,waimai,`date +%s`000" >> a.log
else
echo "u$RANDOM,e1,mall,`date +%s`000" >> a.log
fi
sleep 0.2
done

复制flume到hadoop104

[xx@hadoop102 module]$ scp -r flume-1.9.0/ hadoop104:$PWD

开启hadoop103,hadoop104

bin/flume-ng agent -c conf/ -f agentConf/failover.conf -n a1 -Dflume.root.logger=DEBUG,console

开启hadoop102

bin/flume-ng agent -c conf/ -f agentConf/failover.conf -n a1 -Dflume.root.logger=DEBUG,console

kafka查看topic failover

bin/kafka-console-consumer.sh --bootstrap-server \n
hadoop102:9092,hadoop103:9092,hadoop104:9092 \n
--topic failover

u21356,e1,waimai,1616817611000
u11451,e1,mall,1616817611000
u15814,e1,waimai,1616817612000
u20755,e1,waimai,1616817612000

成功!

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

TigRer

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值