nginx,access.log日志约8000条/s,每100w条数据约253M,需要2min
agent1.sources = source1 agent1.sinks = sink1 agent1.channels = channel1 # Describe/configure spooldir source1 #agent1.sources.source1.type = spooldir #agent1.sources.source1.spoolDir = /var/log/apache/flumeSpool1 #agent1.sources.source1.fileHeader = true # Describe/configure tail -F source1 agent1.sources.source1.type = exec agent1.sources.source1.command = tail -F /tmp/log.log agent1.sources.source1.channels = channel1 # Describe/configure nc source1 #agent1.sources.source1.type = netcat #agent1.sources.source1.bind = localhost #agent1.sources.source1.port = 44444 #configure host for source agent1.sources.source1.interceptors = i1 agent1.sources.source1.interceptors.i1.type = host agent1.sources.source1.interceptors.i1.hostHeader = hostname # Describe sink1 #agent1.sinks.sink1.type = logger agent1.sinks.sink1.type = hdfs #a1.sinks.k1.channel = c1 #agent1.sinks.sink1.hdfs.path =hdfs://xxx:9000/tmp/tail/%y-%m-%d/%H%M%S agent1.sinks.sink1.hdfs.path =hdfs://xxx:9000/tmp/tail/%y-%m-%d/%H agent1.sinks.sink1.hdfs.filePrefix = %{hostname}/events- agent1.sinks.sink1.hdfs.maxOpenFiles = 5000 agent1.sinks.sink1.hdfs.batchSize= 500 agent1.sinks.sink1.hdfs.fileType = DataStream agent1.sinks.sink1.hdfs.writeFormat =Text agent1.sinks.sink1.hdfs.rollSize = 0 agent1.sinks.sink1.hdfs.rollCount = 1000000 agent1.sinks.sink1.hdfs.rollInterval = 600 #agent1.sinks.sink1.hdfs.round = true #agent1.sinks.sink1.hdfs.roundValue = 10 #agent1.sinks.sink1.hdfs.roundUnit = minute agent1.sinks.sink1.hdfs.useLocalTimeStamp = true # Use a channel which buffers events in memory agent1.channels.channel1.type = memory agent1.channels.channel1.keep-alive = 120 agent1.channels.channel1.capacity = 500000 agent1.channels.channel1.transactionCapacity = 600 # Bind the source and sink to the channel agent1.sources.source1.channels = channel1 agent1.sinks.sink1.channel = channel1