1.配置flume-conf.properties
buttery.sources = buttSource
buttery.channels = buttChannel
# source
buttery.sources.buttSource.type = spooldir
buttery.sources.buttSource.spoolDir = /home/flume/input
buttery.sources.buttSource.deserializer = LINE
buttery.sources.buttSource.deserializer.maxLineLength = 320000
buttery.sources.buttSource.includePattern = ^.\*.csv$
buttery.sources.buttSource.fileHeader = true
buttery.sources.buttSource.channels = buttChannel
# channels
buttery.channels.buttChannel.type = org.apache.flume.channel.kafka.KafkaChannel
# 设置Kafka集群中的Brokers
buttery.channels.buttChannel.kafka.bootstrap.servers = IP1:9092,IP2:9092,IP3:9092
# 设置Kafka的Topic
buttery.channels.buttChannel.kafka.topic = AAA
# 设置成不按照flume event格式解析数据,因为同一个Kafka topic可能有非flume Event类数据传入
buttery.channels.buttChannel.parseAsFlumeEvent = false
# 设置消费者组,保证每次消费时能够获取上次对应的Offset
buttery.channels.buttChannel.kafka.consumer.group.id = flume-consumer
# 设置消费过程poll()超时时间(ms)
buttery.channels.buttChannel.pollTimeout = 1000
2.配置docker-compose.yml
version: '3.3'
services:
flume:
image: flume:1.9.0
container_name: flume
hostname: flume
environment:
- FLUME_CONF_DIR=/usr/flume/conf
- FLUME_AGENT_NAME=buttery
ports:
- 5555:5555
- 6666:6666
volumes:
- ./conf/core-site.xml:/usr/hadoop/etc/hadoop/core-site.xml
- ./conf/hdfs-site.xml:/usr/hadoop/etc/hadoop/hdfs-site.xml
- ./conf/mapred-site.xml:/usr/hadoop/etc/hadoop/mapred-site.xml
- ./conf/yarn-site.xml:/usr/hadoop/etc/hadoop/yarn-site.xml
- ./conf/workers:/usr/hadoop/etc/hadoop/workers
- ./conf/flume-conf.properties:/usr/flume/conf/flume-conf.properties
- ./input:/home/flume/input
- ./output:/home/flume/output
- ./run.sh:/run.sh
l