记flume发往kafka的一次配置

最新推荐文章于 2024-01-12 17:49:58 发布

ZL_bigdata

最新推荐文章于 2024-01-12 17:49:58 发布

阅读量187

点赞数

文章标签： kafka flume big data

本文链接：https://blog.csdn.net/ZL_javaco/article/details/120080557

版权

1.配置flume-conf.properties

buttery.sources = buttSource
buttery.channels = buttChannel

# source
buttery.sources.buttSource.type = spooldir
buttery.sources.buttSource.spoolDir = /home/flume/input
buttery.sources.buttSource.deserializer = LINE
buttery.sources.buttSource.deserializer.maxLineLength = 320000
buttery.sources.buttSource.includePattern = ^.\*.csv$
buttery.sources.buttSource.fileHeader = true
buttery.sources.buttSource.channels = buttChannel

# channels
buttery.channels.buttChannel.type = org.apache.flume.channel.kafka.KafkaChannel
# 设置Kafka集群中的Brokers
buttery.channels.buttChannel.kafka.bootstrap.servers = IP1:9092,IP2:9092,IP3:9092
# 设置Kafka的Topic
buttery.channels.buttChannel.kafka.topic = AAA
# 设置成不按照flume event格式解析数据,因为同一个Kafka topic可能有非flume Event类数据传入
buttery.channels.buttChannel.parseAsFlumeEvent = false
# 设置消费者组,保证每次消费时能够获取上次对应的Offset
buttery.channels.buttChannel.kafka.consumer.group.id = flume-consumer
# 设置消费过程poll()超时时间(ms)
buttery.channels.buttChannel.pollTimeout = 1000

2.配置docker-compose.yml

version: '3.3'

services:

  flume:
    image: flume:1.9.0
    container_name: flume
    hostname: flume
    environment:
      - FLUME_CONF_DIR=/usr/flume/conf
      - FLUME_AGENT_NAME=buttery
    ports:
      - 5555:5555
      - 6666:6666
    volumes:
      - ./conf/core-site.xml:/usr/hadoop/etc/hadoop/core-site.xml
      - ./conf/hdfs-site.xml:/usr/hadoop/etc/hadoop/hdfs-site.xml
      - ./conf/mapred-site.xml:/usr/hadoop/etc/hadoop/mapred-site.xml
      - ./conf/yarn-site.xml:/usr/hadoop/etc/hadoop/yarn-site.xml
      - ./conf/workers:/usr/hadoop/etc/hadoop/workers
      - ./conf/flume-conf.properties:/usr/flume/conf/flume-conf.properties
      - ./input:/home/flume/input
      - ./output:/home/flume/output
      - ./run.sh:/run.sh