flume采集数据到hdfs、kafka配置文件
- 执行命令
nohup bin/flume-ng agent -n a10 -c conf/ -f ./conf/server/flume-taildir-kafka.conf -Dflume.root.logger=INFO,console >> ./logs/fflume-taildir-kafka.log 2>&1 &
- 采集日志文件到kafka
flume-taildir-kafka.conf
#agent
a10.sources = r1
a10.channels = c1
#source
a10.sources.r1.type = TAILDIR
a10.sources.r1.filegroups = f1
#读取的文件路径
a10.sources.r1.filegroups.f1 = /data1/onepiece-recommender/logs/statistics.log
#通过 json 格式存下每个文件消费的偏移量,避免从头消费
a10.sources.r1.positionFile = /usr/local/flume/logs/taildir_position_event_log.json
a10.sources.r1.fileHeader = false
#batchSize
a10.sources.r1.batchSize = 1000
#channel
a10.channels.c1.type = org.apache.flume.channel.kafka.KafkaChannel
a10.channels.c1.kafka.bootstrap.servers = 10.70.110.70:9092,10.70.110.71:9092,10.70.110.82:9092
a10.channels.c1.kafka.topic = es-service-cost-monitor
a10.channels.c1.kafka.consumer.group.id = flume-consumer-event-001
a10.channels.c1.parseAsFlumeEvent = false
a10.channels.c1.kafka.producer.compression.type = lz4
a10.channels.c1.kafka.flumeBatchSize = 5000
a10.channels.c1.kafka.producer.acks = all
a10.channels.c1.kafka.producer.linger.ms = 30
#关联关系
a10.sources.r1.channels = c1
采集日志文件到hdfs
#agent
a10.sources = r1
a10.channels = c1
a10.sinks = k1
#source
a10.sources.r1.type = TAILDIR
a10.sources.r1.filegroups = f1
#读取的文件路径
a10.sources.r1.filegroups.f1 = /usr/local/nginx/logs/action_data/access.log
#通过 json 格式存下每个文件消费的偏移量,避免从头消费
a10.sources.r1.positionFile = /home/user/apache-flume-1.9.0-bin/logs/taildir_position_event_log.json
a10.sources.r1.fileHeader = false
#batchSize
a10.sources.r1.batchSize = 1000
#channel
a10.channels.c1.type = org.apache.flume.channel.kafka.KafkaChannel
a10.channels.c1.kafka.bootstrap.servers = 10.10.100.124:9092
a10.channels.c1.kafka.topic = inference_raw_log
a10.channels.c1.kafka.consumer.group.id = flume-consumer-eventlog-001
a10.channels.c1.parseAsFlumeEvent = false
a10.channels.c1.kafka.producer.compression.type = lz4
a10.channels.c1.kafka.flumeBatchSize = 5000
a10.channels.c1.kafka.producer.acks = all
a10.channels.c1.kafka.producer.linger.ms = 30
a10.sinks.k1.type = hdfs
a10.sinks.k1.hdfs.path =hdfs://10.10.100.124:9000/data/action_log/%y%m%d/
a10.sinks.k1.hdfs.filePrefix = access_log
a10.sinks.k1.hdfs.maxOpenFiles = 5000
a10.sinks.k1.hdfs.batchSize= 100
a10.sinks.k1.hdfs.fileType = DataStream
a10.sinks.k1.hdfs.writeFormat =Text
a10.sinks.k1.hdfs.rollSize = 102400
a10.sinks.k1.hdfs.rollCount = 1000000
a10.sinks.k1.hdfs.rollInterval = 60
a10.sinks.k1.hdfs.round = true
a10.sinks.k1.hdfs.roundValue = 10
a10.sinks.k1.hdfs.roundUnit = minute
a10.sinks.k1.hdfs.useLocalTimeStamp = true
#关联关系
a10.sources.r1.channels = c1
a10.sinks.k1.channel = c1