一、采集说明
本采集方式使用socket通信的方式,向flume发送数据进行采集,采集后的数据存储到hdfs和kafka。
二、配置文件
1、source采集层
port.properties
# set agent name
agent.sources = r1
agent.channels = c_kafka c_hdfs
agent.sinks = s_kafka_k1 s_kafka_k2 s_kafka_k3 s_hdfs_k1 s_hdfs_k2
# set group
agent1.sinkgroups = g_kafka g_hdfs
# set sources
# 采用tcp的方式收集数据
agent.sources.r1.type =syslogtcp
agent.sources.r1.bind=10.0.2.6
agent.sources.r1.port=44444
agent.sources.r1.channels =c_kafka c_hdfs
# set kafka channels
agent.channels.c_kafka.type = file
agent.channels.c_kafka.checkpointDir = /usr/local/flume-1.7.0-bin/spool/checkpoint
agent.channels.c_kafka.dataDirs = /usr/local/flume-1.7.0-bin/spool/data
agent.channels.c_kafka.capacity = 200000000
agent.channels.c_kafka.keep-alive = 30
agent.channels.c_kafka.write-timeout = 30
agent.channels.c_kafka.checkpoint-timeout=600
# set hdfs channels
agent.channels.c_hdfs