flume导入文件到Kafka
创建新主题存放此次消息
kafka-topics.sh --create --zookeeper 192.168.153.133:2181 --topic train_row --partitions 1 --replication-factor 1
配置flume文件,执行
flume-ng agent --name events --conf /opt/soft/flume160/conf/ --conf-file /opt/soft/flume160/conf/kb15conf/train.conf -Dflume.root.logger=INFO,console
train.conf
events.sources=eventsSource
events.channels=eventsChannel
events.sinks=eventsSink
events.sources.eventsSource.type=spooldir
#监听目录,该目录下出现新文件,符合匹配条件可以持续监听
events.sources.eventsSource.spoolDir=/opt/kb15tmp/flumelogfile/train
events.sources.eventsSource.deserializer=LINE
#每行最大长度
events.sources.eventsSource.deserializer.maxLineLength=320000
#匹配条件
events.sources.eventsSource.includePattern=train_[0-9]{4}-[0-9]{2}-[0-9]{2}.csv
#去除头文件
events.sources.eventsSource.interceptors.head_filter.type=regex_filter
events.sources.eventsSource.interceptors.head_filter.regex=^User*
events.sources.eventsSource.interceptors.head_filter.excludeEvents=true
events.channels.eventsChannel.type=file
events.channels.eventsChannel.checkpointDir=/opt/kb15tmp/checkpoint/train
events.channels.eventsChannel.dataDirs=/opt/kb15tmp/checkpoint/data/train
#读取到kafka主题
events.sinks.eventsSink.type=org.apache.flume.sink.kafka.KafkaSink
events.sinks.eventsSink.batchSize=640
events.sinks.eventsSink.brokerList=192.168.153.133:9092
events.sinks.eventsSink.topic=train_row
events.sources.eventsSource.channels=eventsChannel
events.sinks.eventsSink.channel=eventsChannel
创建消费者
kafka-console-consumer.sh --bootstrap-server chen133:9092 --topic train_row --from-beginning
数据存入Kafka