flume接收http请求,并将数据写到kafka,spark消费kafka的数据。是数据采集的经典框架。
直接上flume的配置:
source : http
channel : file
sink : kafka
xx :~/software/flume1.8/conf$ cat http-file-kafka.conf
# example.conf: A single-node Flume configuration
##########
# data example
# use post request, select raw, data example : [{"body" : "{'xx':'xxxxx1'}"}]
# just use the office request demo
#[{
# "headers" : {
# "timestamp" : "434324343",
# "host" : "random_host.example.com"
# "topic" : "venn" # if headers contain topic, will replace the default topic
# },
# "body" : "random_body" # random_body is the message send to channel
# }]
# Name the components on this agent1
agent1.sources = s1
agent1.sinks = k1
agent1.channels = c1
# Describe/configure the source
agent1.sources.s1.type = http
agent1.sources.s1.bind = spring # localhost 只能接收本地请求
agent1.sources.s1.port = 8084 # http的端口
agent1.sources.s1.handler = org.apache.flume.source.http.JSONHandler # 自带的接收http请求的handler
# Describe the sink
agent1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink # kafkasink
agent1.sinks.k1.kafka.topic = mytopic # topic
agent1.sinks.k1.kafka.bootstrap.servers = localhost:9092 # kafka host and port
agent1.sinks.k1.kafka.flumeBatchSize = 20
agent1.sinks.k1.kafka.producer.acks = 1
agent1.sinks.k1.kafka.producer.linger.ms = 1
agent1.sinks.k1.kafka.producer.compression.type = snappy # 压缩
# Use a channel which buffers events in memory
agent1.channels.c1.type = file
#agent1.channels.c1.capacity