标题好长。。。好复杂。。。
flume+kafka+spark-streaming,应该说这一套架构已经成为流式计算的标配了。
如何整合我这里就不赘述了。
上几个配置文件好了
a1.sources = r1
a1.channels = c1
a1.sinks=k1
a1.sources.r1.type = TAILDIR
a1.sources.r1.channels = c1
a1.sources.r1.positionFile = /work/onedesk/bidlog/apache-flume-1.7.0-bin/taildir_position.json
a1.sources.r1.filegroups = f1 f2 f3
a1.sources.r1.filegroups.f1 = /work/onedesk/bidlog/bid.tmp
a1.sources.r1.headers.f1.topic = bid
a1.sources.r1.filegroups.f2 = /work/onedesk/bidlog/sspbid.tmp
a1.sources.r1.headers.f2.topic = sspbid
a1.sources.r1.filegroups.f3 = /work/onedesk/bidlog/sspclick.tmp
a1.sources.r1.headers.f3.topic = sspclick
a1.sources.r1.fileHeader = true
a1.channels.c1.type = memory
a1.channels.c1.capacity = 100000
a1.channels.c1.transactionCapacity = 1000
a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink