flume的数据源是自定义的NIOSource, 写往两条线,一条是hdfs,一条是写往kafaka
1. 示例一:flume-conf.properties
#############################################
c1.sources = c1so1
c1.channels = c1ch1 c1ch2
c1.sinks = c1si1 c1si2
c1.sources.c1so1.type=nio
c1.sources.c1so1.channels = c1ch1 c1ch2
c1.sources.c1so1.selector.type = replicating
c1.sources.c1so1.selector.optional = c1ch2
c1.sources.c1so1.bind = 10.0.15.71
c1.sources.c1so1.port = 8080
c1.sources.c1so1.delimiter=
c1.sources.c1so1.interceptors = i1 i2
c1.sources.c1so1.interceptors.i1.type = timestamp
c1.sources.c1so1.interceptors.i2.type = host
c1.channels.c1ch1.type = memory
c1.channels.c1ch1.keep-alive=30
c1.channels.c1ch1.byteCapacity=0
c1.channels.c1ch1.capacity = 100000
c1.channels.c1ch1.transactionCapacity = 100
c1.channels.c1ch2.type = memory
c1.channels.c1ch2.keep-alive=30
c1.channels.c1ch2.byteCapacity=0
c1.channels.c1ch2.capacity = 100000
c1.channels.c1ch2.transactionCapacity = 100
c1.sinks.c1si1.type = hdfs
c1.sinks.c1si1.channel = c1ch1
c1.sinks.c1si1.hdfs.path = hdfs://cloud40:9000/m2m/data01/%y-%m-%d
c1.sinks.c1si1.hdfs.filePrefix = log-%{host}
c1.sinks.c1si1.hdfs.idleTimeout=30
c1.sinks.c1si1.hdfs.callTimeout=300000
c1.sinks.c1si1.hdfs.rollCount=0
c1.sinks.c1si1.hdfs.rollSize=16000000
c1.sinks.c1si1.hdfs.rollInterval=0
c1.sinks.c1si1.hdfs.round = false
c1.sinks.c1si1.hdfs.roundValue = 10
c1.sinks.c1si1.hdfs.roundUnit = minute
c1.sinks.c1si1.hdfs.fileType = DataStream
c1.sinks.c1si1.hdfs.writeFormat = Text
c1.sinks.c1si1.hdfs.batchSize=100
c1.sinks.c1si1.serializer.appendNewline = true
c1.sinks.c1si2.type = org.apache.flume.plugins.KafkaSink
c1.sinks.c1si2.channel = c1ch2
c1.sinks.c1si2.metadata.broker.list=10.0.15.203:9092,10.0.15.204:9092,10.0.15.206:9092,10.0.15.207:9092,10.0.15.208:9092
c1.sinks.c1si2.serializer.class=kafka.serializer.StringEncoder
c1.sinks.c1si2.key.serializer.class=kafka.serializer.StringEncoder
c1.sinks.c1si2.request.required.acks=0
c1.sinks.c1si2.max.message.size=8000000
c1.sinks.c1si2.producer.type=sync
c1.sinks.c1si2.custom.encoding=UTF-8
c1.sinks.c1si2.custom.topic.name=p2p
#############################################
c2.sources = c2so1
c2.channels = c2ch1 c2ch2
c2.sinks = c2si1 c2si2
c2.sources.c2so1.type=nio
c2.sources.c2so1.channels = c2ch1 c2ch2
c2.sources.c2so1.selector.type = replicating
c2.sources.c2so1.selector.optional = c2ch2
c2.sources.c2so1.bind = 10.0.15.71
c2.sources.c2so1.port = 8081
c2.sources.c2so1.delimiter=
c2.sources.c2so1.interceptors = i1 i2
c2.sources.c2so1.interceptors.i1.type = timestamp
c2.sources.c2so1.interceptors.i2.type = host
c2.channels.c2ch1.type = memory
c2.channels.c2ch1.keep-alive=30
c2.channels.c2ch1.byteCapacity=0
c2.channels.c2ch1.capacity = 100000
c2.channels.c2ch1.transactionCapacity = 100
c2.channels.c2ch2.type = memory
c2.channels.c2ch2.keep-alive=30
c2.channels.c2ch2.byteCapacity=0
c2.channels.c2ch2.capacity = 100000
c2.channels.c2ch2.transactionCapacity = 100
c2.sinks.c2si1.type = hdfs
c2.sinks.c2si1.channel = c2ch1
c2.sinks.c2si1.hdfs.path = hdfs://cloud40:9000/m2m/data02/%y-%m-%d
c2.sinks.c2si1.hdfs.filePrefix = log-%{host}
c2.sinks.c2si1.hdfs.idleTimeout=30
c2.sinks.c2si1.hdfs.callTimeout=300000
c2.sinks.c2si1.hdfs.rollCount=0
c2.sinks.c2si1.hdfs.rollSize=16000000
c2.sinks.c2si1.hdfs.rollInterval=0
c2.sinks.c2si1.hdfs.round = false
c2.sinks.c2si1.hdfs.roundValue = 10
c2.sinks.c2si1.hdfs.roundUnit = minute
c2.sinks.c2si1.hdfs.fileType = DataStream
c2.sinks.c2si1.hdfs.writeFormat = Text
c2.sinks.c2si1.hdfs.batchSize=100
c2.sinks.c2si1.serializer.appendNewline = true
c2.sinks.c2si2.type = org.apache.flume.plugins.KafkaSink
c2.sinks.c2si2.channel = c2ch2
c2.sinks.c2si2.metadata.broker.list=10.0.15.203:9092,10.0.15.204:9092,10.0.15.206:9092,10.0.15.207:9092,10.0.15.208:9092
c2.sinks.c2si2.serializer.class=kafka.serializer.StringEncoder
c2.sinks.c2si2.key.serializer.class=kafka.serializer.StringEncoder
c2.sinks.c2si2.request.required.acks=0
c2.sinks.c2si2.max.message.size=8000000
c2.sinks.c2si2.producer.type=sync
c2.sinks.c2si2.custom.encoding=UTF-8
c2.sinks.c2si2.custom.topic.name=p2p
2. 示例二:flume-conf.properties
#############################################
c1.sources = c1so1
c1.channels = c1ch1 c1ch2
c1.sinks = c1si1 c1si2
c1.sources.c1so1.type=nio
c1.sources.c1so1.channels = c1ch1 c1ch2
c1.sources.c1so1.selector.type = replicating
c1.sources.c1so1.selector.optional = c1ch2
c1.sources.c1so1.bind = 10.0.15.70
c1.sources.c1so1.port = 8080
c1.sources.c1so1.delimiter=*
c1.sources.c1so1.interceptors = i1 i2
c1.sources.c1so1.interceptors.i1.type = timestamp
c1.sources.c1so1.interceptors.i2.type = host
c1.channels.c1ch1.type = memory
c1.channels.c1ch1.keep-alive=300
c1.channels.c1ch1.byteCapacity=0
c1.channels.c1ch1.capacity = 200000
c1.channels.c1ch1.transactionCapacity = 1000
c1.channels.c1ch2.type = memory
c1.channels.c1ch2.keep-alive=300
c1.channels.c1ch2.byteCapacity=0
c1.channels.c1ch2.capacity = 200000
c1.channels.c1ch2.transactionCapacity = 1000
c1.sinks.c1si1.type = hdfs
c1.sinks.c1si1.channel = c1ch1
c1.sinks.c1si1.hdfs.path = hdfs://cloud40:9000/m2m/default/%y-%m-%d
c1.sinks.c1si1.hdfs.filePrefix = log-%{host}
c1.sinks.c1si1.hdfs.idleTimeout=30
c1.sinks.c1si1.hdfs.callTimeout=300000
c1.sinks.c1si1.hdfs.rollCount=0
c1.sinks.c1si1.hdfs.rollSize=512000000
c1.sinks.c1si1.hdfs.rollInterval=0
c1.sinks.c1si1.hdfs.round = false
c1.sinks.c1si1.hdfs.roundValue = 10
c1.sinks.c1si1.hdfs.roundUnit = minute
c1.sinks.c1si1.hdfs.fileType = DataStream
c1.sinks.c1si1.hdfs.writeFormat = Text
c1.sinks.c1si1.hdfs.batchSize=1000
c1.sinks.c1si1.serializer.appendNewline = true
c1.sinks.c1si2.type = org.apache.flume.plugins.KafkaSink
c1.sinks.c1si2.channel = c1ch2
c1.sinks.c1si2.metadata.broker.list=10.0.15.203:9092,10.0.15.204:9092,10.0.15.206:9092,10.0.15.207:9092,10.0.15.208:9092
c1.sinks.c1si2.serializer.class=kafka.serializer.StringEncoder
c1.sinks.c1si2.key.serializer.class=kafka.serializer.StringEncoder
c1.sinks.c1si2.request.required.acks=0
c1.sinks.c1si2.max.message.size=8000000
c1.sinks.c1si2.producer.type=sync
c1.sinks.c1si2.custom.encoding=UTF-8
c1.sinks.c1si2.custom.topic.name=m2m
#############################################
c2.sources = c2so1
c2.channels = c2ch1 c2ch2
c2.sinks = c2si1 c2si2
c2.sources.c2so1.type=nio
c2.sources.c2so1.channels = c2ch1 c2ch2
c2.sources.c2so1.selector.type = replicating
c2.sources.c2so1.selector.optional = c2ch2
c2.sources.c2so1.bind = 10.0.15.70
c2.sources.c2so1.port = 8081
c2.sources.c2so1.delimiter=*
c2.sources.c2so1.interceptors = i1 i2
c2.sources.c2so1.interceptors.i1.type = timestamp
c2.sources.c2so1.interceptors.i2.type = host
c2.channels.c2ch1.type = memory
c2.channels.c2ch1.keep-alive=300
c2.channels.c2ch1.byteCapacity=0
c2.channels.c2ch1.capacity = 200000
c2.channels.c2ch1.transactionCapacity = 1000
c2.channels.c2ch2.type = memory
c2.channels.c2ch2.keep-alive=300
c2.channels.c2ch2.byteCapacity=0
c2.channels.c2ch2.capacity = 200000
c2.channels.c2ch2.transactionCapacity = 1000
c2.sinks.c2si1.type = hdfs
c2.sinks.c2si1.channel = c2ch1
c2.sinks.c2si1.hdfs.path = hdfs://cloud40:9000/m2m/data03/%y-%m-%d
c2.sinks.c2si1.hdfs.filePrefix = log-%{host}
c2.sinks.c2si1.hdfs.idleTimeout=30
c2.sinks.c2si1.hdfs.callTimeout=300000
c2.sinks.c2si1.hdfs.rollCount=0
c2.sinks.c2si1.hdfs.rollSize=512000000
c2.sinks.c2si1.hdfs.rollInterval=0
c2.sinks.c2si1.hdfs.round = false
c2.sinks.c2si1.hdfs.roundValue = 10
c2.sinks.c2si1.hdfs.roundUnit = minute
c2.sinks.c2si1.hdfs.fileType = DataStream
c2.sinks.c2si1.hdfs.writeFormat = Text
c2.sinks.c2si1.hdfs.batchSize=1000
c2.sinks.c2si1.serializer.appendNewline = true
c2.sinks.c2si2.type = org.apache.flume.plugins.KafkaSink
c2.sinks.c2si2.channel = c2ch2
c2.sinks.c2si2.metadata.broker.list=10.0.15.203:9092,10.0.15.204:9092,10.0.15.206:9092,10.0.15.207:9092,10.0.15.208:9092
c2.sinks.c2si2.serializer.class=kafka.serializer.StringEncoder
c2.sinks.c2si2.key.serializer.class=kafka.serializer.StringEncoder
c2.sinks.c2si2.request.required.acks=0
c2.sinks.c2si2.max.message.size=8000000
c2.sinks.c2si2.producer.type=sync
c2.sinks.c2si2.custom.encoding=UTF-8
c2.sinks.c2si2.custom.topic.name=m2m
启动方式:
nohup /opt/apache-flume-1.4.0-bin/bin/flume-ng agent -c /opt/apache-flume-1.4.0-bin/conf/ -n c1 -f /opt/apache-flume-1
.4.0-bin/conf/flume-conf.properties &
nohup /opt/apache-flume-1.4.0-bin/bin/flume-ng agent -c /opt/apache-flume-1.4.0-bin/conf/ -n c2 -f /opt/apache-flume-1
.4.0-bin/conf/flume-conf.properties &
1. 示例一:flume-conf.properties
#############################################
c1.sources = c1so1
c1.channels = c1ch1 c1ch2
c1.sinks = c1si1 c1si2
c1.sources.c1so1.type=nio
c1.sources.c1so1.channels = c1ch1 c1ch2
c1.sources.c1so1.selector.type = replicating
c1.sources.c1so1.selector.optional = c1ch2
c1.sources.c1so1.bind = 10.0.15.71
c1.sources.c1so1.port = 8080
c1.sources.c1so1.delimiter=
c1.sources.c1so1.interceptors = i1 i2
c1.sources.c1so1.interceptors.i1.type = timestamp
c1.sources.c1so1.interceptors.i2.type = host
c1.channels.c1ch1.type = memory
c1.channels.c1ch1.keep-alive=30
c1.channels.c1ch1.byteCapacity=0
c1.channels.c1ch1.capacity = 100000
c1.channels.c1ch1.transactionCapacity = 100
c1.channels.c1ch2.type = memory
c1.channels.c1ch2.keep-alive=30
c1.channels.c1ch2.byteCapacity=0
c1.channels.c1ch2.capacity = 100000
c1.channels.c1ch2.transactionCapacity = 100
c1.sinks.c1si1.type = hdfs
c1.sinks.c1si1.channel = c1ch1
c1.sinks.c1si1.hdfs.path = hdfs://cloud40:9000/m2m/data01/%y-%m-%d
c1.sinks.c1si1.hdfs.filePrefix = log-%{host}
c1.sinks.c1si1.hdfs.idleTimeout=30
c1.sinks.c1si1.hdfs.callTimeout=300000
c1.sinks.c1si1.hdfs.rollCount=0
c1.sinks.c1si1.hdfs.rollSize=16000000
c1.sinks.c1si1.hdfs.rollInterval=0
c1.sinks.c1si1.hdfs.round = false
c1.sinks.c1si1.hdfs.roundValue = 10
c1.sinks.c1si1.hdfs.roundUnit = minute
c1.sinks.c1si1.hdfs.fileType = DataStream
c1.sinks.c1si1.hdfs.writeFormat = Text
c1.sinks.c1si1.hdfs.batchSize=100
c1.sinks.c1si1.serializer.appendNewline = true
c1.sinks.c1si2.type = org.apache.flume.plugins.KafkaSink
c1.sinks.c1si2.channel = c1ch2
c1.sinks.c1si2.metadata.broker.list=10.0.15.203:9092,10.0.15.204:9092,10.0.15.206:9092,10.0.15.207:9092,10.0.15.208:9092
c1.sinks.c1si2.serializer.class=kafka.serializer.StringEncoder
c1.sinks.c1si2.key.serializer.class=kafka.serializer.StringEncoder
c1.sinks.c1si2.request.required.acks=0
c1.sinks.c1si2.max.message.size=8000000
c1.sinks.c1si2.producer.type=sync
c1.sinks.c1si2.custom.encoding=UTF-8
c1.sinks.c1si2.custom.topic.name=p2p
#############################################
c2.sources = c2so1
c2.channels = c2ch1 c2ch2
c2.sinks = c2si1 c2si2
c2.sources.c2so1.type=nio
c2.sources.c2so1.channels = c2ch1 c2ch2
c2.sources.c2so1.selector.type = replicating
c2.sources.c2so1.selector.optional = c2ch2
c2.sources.c2so1.bind = 10.0.15.71
c2.sources.c2so1.port = 8081
c2.sources.c2so1.delimiter=
c2.sources.c2so1.interceptors = i1 i2
c2.sources.c2so1.interceptors.i1.type = timestamp
c2.sources.c2so1.interceptors.i2.type = host
c2.channels.c2ch1.type = memory
c2.channels.c2ch1.keep-alive=30
c2.channels.c2ch1.byteCapacity=0
c2.channels.c2ch1.capacity = 100000
c2.channels.c2ch1.transactionCapacity = 100
c2.channels.c2ch2.type = memory
c2.channels.c2ch2.keep-alive=30
c2.channels.c2ch2.byteCapacity=0
c2.channels.c2ch2.capacity = 100000
c2.channels.c2ch2.transactionCapacity = 100
c2.sinks.c2si1.type = hdfs
c2.sinks.c2si1.channel = c2ch1
c2.sinks.c2si1.hdfs.path = hdfs://cloud40:9000/m2m/data02/%y-%m-%d
c2.sinks.c2si1.hdfs.filePrefix = log-%{host}
c2.sinks.c2si1.hdfs.idleTimeout=30
c2.sinks.c2si1.hdfs.callTimeout=300000
c2.sinks.c2si1.hdfs.rollCount=0
c2.sinks.c2si1.hdfs.rollSize=16000000
c2.sinks.c2si1.hdfs.rollInterval=0
c2.sinks.c2si1.hdfs.round = false
c2.sinks.c2si1.hdfs.roundValue = 10
c2.sinks.c2si1.hdfs.roundUnit = minute
c2.sinks.c2si1.hdfs.fileType = DataStream
c2.sinks.c2si1.hdfs.writeFormat = Text
c2.sinks.c2si1.hdfs.batchSize=100
c2.sinks.c2si1.serializer.appendNewline = true
c2.sinks.c2si2.type = org.apache.flume.plugins.KafkaSink
c2.sinks.c2si2.channel = c2ch2
c2.sinks.c2si2.metadata.broker.list=10.0.15.203:9092,10.0.15.204:9092,10.0.15.206:9092,10.0.15.207:9092,10.0.15.208:9092
c2.sinks.c2si2.serializer.class=kafka.serializer.StringEncoder
c2.sinks.c2si2.key.serializer.class=kafka.serializer.StringEncoder
c2.sinks.c2si2.request.required.acks=0
c2.sinks.c2si2.max.message.size=8000000
c2.sinks.c2si2.producer.type=sync
c2.sinks.c2si2.custom.encoding=UTF-8
c2.sinks.c2si2.custom.topic.name=p2p
2. 示例二:flume-conf.properties
#############################################
c1.sources = c1so1
c1.channels = c1ch1 c1ch2
c1.sinks = c1si1 c1si2
c1.sources.c1so1.type=nio
c1.sources.c1so1.channels = c1ch1 c1ch2
c1.sources.c1so1.selector.type = replicating
c1.sources.c1so1.selector.optional = c1ch2
c1.sources.c1so1.bind = 10.0.15.70
c1.sources.c1so1.port = 8080
c1.sources.c1so1.delimiter=*
c1.sources.c1so1.interceptors = i1 i2
c1.sources.c1so1.interceptors.i1.type = timestamp
c1.sources.c1so1.interceptors.i2.type = host
c1.channels.c1ch1.type = memory
c1.channels.c1ch1.keep-alive=300
c1.channels.c1ch1.byteCapacity=0
c1.channels.c1ch1.capacity = 200000
c1.channels.c1ch1.transactionCapacity = 1000
c1.channels.c1ch2.type = memory
c1.channels.c1ch2.keep-alive=300
c1.channels.c1ch2.byteCapacity=0
c1.channels.c1ch2.capacity = 200000
c1.channels.c1ch2.transactionCapacity = 1000
c1.sinks.c1si1.type = hdfs
c1.sinks.c1si1.channel = c1ch1
c1.sinks.c1si1.hdfs.path = hdfs://cloud40:9000/m2m/default/%y-%m-%d
c1.sinks.c1si1.hdfs.filePrefix = log-%{host}
c1.sinks.c1si1.hdfs.idleTimeout=30
c1.sinks.c1si1.hdfs.callTimeout=300000
c1.sinks.c1si1.hdfs.rollCount=0
c1.sinks.c1si1.hdfs.rollSize=512000000
c1.sinks.c1si1.hdfs.rollInterval=0
c1.sinks.c1si1.hdfs.round = false
c1.sinks.c1si1.hdfs.roundValue = 10
c1.sinks.c1si1.hdfs.roundUnit = minute
c1.sinks.c1si1.hdfs.fileType = DataStream
c1.sinks.c1si1.hdfs.writeFormat = Text
c1.sinks.c1si1.hdfs.batchSize=1000
c1.sinks.c1si1.serializer.appendNewline = true
c1.sinks.c1si2.type = org.apache.flume.plugins.KafkaSink
c1.sinks.c1si2.channel = c1ch2
c1.sinks.c1si2.metadata.broker.list=10.0.15.203:9092,10.0.15.204:9092,10.0.15.206:9092,10.0.15.207:9092,10.0.15.208:9092
c1.sinks.c1si2.serializer.class=kafka.serializer.StringEncoder
c1.sinks.c1si2.key.serializer.class=kafka.serializer.StringEncoder
c1.sinks.c1si2.request.required.acks=0
c1.sinks.c1si2.max.message.size=8000000
c1.sinks.c1si2.producer.type=sync
c1.sinks.c1si2.custom.encoding=UTF-8
c1.sinks.c1si2.custom.topic.name=m2m
#############################################
c2.sources = c2so1
c2.channels = c2ch1 c2ch2
c2.sinks = c2si1 c2si2
c2.sources.c2so1.type=nio
c2.sources.c2so1.channels = c2ch1 c2ch2
c2.sources.c2so1.selector.type = replicating
c2.sources.c2so1.selector.optional = c2ch2
c2.sources.c2so1.bind = 10.0.15.70
c2.sources.c2so1.port = 8081
c2.sources.c2so1.delimiter=*
c2.sources.c2so1.interceptors = i1 i2
c2.sources.c2so1.interceptors.i1.type = timestamp
c2.sources.c2so1.interceptors.i2.type = host
c2.channels.c2ch1.type = memory
c2.channels.c2ch1.keep-alive=300
c2.channels.c2ch1.byteCapacity=0
c2.channels.c2ch1.capacity = 200000
c2.channels.c2ch1.transactionCapacity = 1000
c2.channels.c2ch2.type = memory
c2.channels.c2ch2.keep-alive=300
c2.channels.c2ch2.byteCapacity=0
c2.channels.c2ch2.capacity = 200000
c2.channels.c2ch2.transactionCapacity = 1000
c2.sinks.c2si1.type = hdfs
c2.sinks.c2si1.channel = c2ch1
c2.sinks.c2si1.hdfs.path = hdfs://cloud40:9000/m2m/data03/%y-%m-%d
c2.sinks.c2si1.hdfs.filePrefix = log-%{host}
c2.sinks.c2si1.hdfs.idleTimeout=30
c2.sinks.c2si1.hdfs.callTimeout=300000
c2.sinks.c2si1.hdfs.rollCount=0
c2.sinks.c2si1.hdfs.rollSize=512000000
c2.sinks.c2si1.hdfs.rollInterval=0
c2.sinks.c2si1.hdfs.round = false
c2.sinks.c2si1.hdfs.roundValue = 10
c2.sinks.c2si1.hdfs.roundUnit = minute
c2.sinks.c2si1.hdfs.fileType = DataStream
c2.sinks.c2si1.hdfs.writeFormat = Text
c2.sinks.c2si1.hdfs.batchSize=1000
c2.sinks.c2si1.serializer.appendNewline = true
c2.sinks.c2si2.type = org.apache.flume.plugins.KafkaSink
c2.sinks.c2si2.channel = c2ch2
c2.sinks.c2si2.metadata.broker.list=10.0.15.203:9092,10.0.15.204:9092,10.0.15.206:9092,10.0.15.207:9092,10.0.15.208:9092
c2.sinks.c2si2.serializer.class=kafka.serializer.StringEncoder
c2.sinks.c2si2.key.serializer.class=kafka.serializer.StringEncoder
c2.sinks.c2si2.request.required.acks=0
c2.sinks.c2si2.max.message.size=8000000
c2.sinks.c2si2.producer.type=sync
c2.sinks.c2si2.custom.encoding=UTF-8
c2.sinks.c2si2.custom.topic.name=m2m
启动方式:
nohup /opt/apache-flume-1.4.0-bin/bin/flume-ng agent -c /opt/apache-flume-1.4.0-bin/conf/ -n c1 -f /opt/apache-flume-1
.4.0-bin/conf/flume-conf.properties &
nohup /opt/apache-flume-1.4.0-bin/bin/flume-ng agent -c /opt/apache-flume-1.4.0-bin/conf/ -n c2 -f /opt/apache-flume-1
.4.0-bin/conf/flume-conf.properties &
来自 “ ITPUB博客 ” ,链接:http://blog.itpub.net/31347383/viewspace-2109815/,如需转载,请注明出处,否则将追究法律责任。
转载于:http://blog.itpub.net/31347383/viewspace-2109815/