版本说明
- apache-flume-1.6.0-cdh5.15.1-bin
- kafka_2.11-0.9.0.0
- scala-2.11.12
- apache-maven-3.3.9
- spark-2.4.4-bin-2.6.0-cdh5.15.1
- zookeeper-3.4.5-cdh5.15.1
处理流程
模拟日志生成
package com.hlsijx.spark;
import org.apache.log4j.Logger;
/**
* 日志生成器,启动后即可模拟日志信息
*/
public class LoggerGenerator {
private static Logger logger = Logger.getLogger(LoggerGenerator.class);
public static void main(String[] args) throws InterruptedException {
int i = 0;
while (true){
logger.info("value:" + (i++));
Thread.sleep(1000);
}
}
}
配置Flume
-
引入flume-ng-log4jappender
<dependency> <groupId>org.apache.flume.flume-ng-clients</groupId> <artifactId>flume-ng-log4jappender</artifactId> <version>1.6.0</version> </dependency>
-
配置avro_memory_kafka.conf
avro_memory_kafka.sources = avro_source avro_memory_kafka.sinks = kafka_sink avro_memory_kafka.channels = memory_channel avro_memory_kafka.sources.avro_source.type = avro avro_memory_kafka.sources.avro_source.bind = hlsijx avro_memory_kafka.sources.avro_source.port = 44444 avro_memory_kafka.sinks.kafka_sink.type = org.apache.flume.sink.kafka.KafkaSink avro_memory_kafka.sinks.kafka_sink.kafka.bootstrap.servers = hlsijx:9092 avro_memory_kafka.sinks.kafka_sink.kafka.topic = hello-spark avro_memory_kafka.sinks.kafka_sink.flumeBatchSize = 5 avro_memory_kafka.channels.memory_channel.type = memory avro_memory_kafka.sources.avro_source.channels = memory_channel avro_memory_kafka.sinks.kafka_sink.channel = memory_channel
-
配置log4j.properties
log4j.rootLogger=info,stdout,flume #console log4j.appender.stdout=org.apache.log4j.ConsoleAppender log4j.appender.stdout.target = System.out log4j.appender.stdout.layout=org.apache.log4j.PatternLayout log4j.appender.stdout.layout.ConversionPattern= %d{yyyy-MM-dd HH:mm:ss,SSS} [%t] [%c] [%p] - %m%n #flume log4jappender log4j.appender.flume = org.apache.flume.clients.log4jappender.Log4jAppender log4j.appender.flume.Hostname = hlsijx log4j.appender.flume.Port = 44444 log4j.appender.flume.UnsafeMode = true
-
启动Flume
flume-ng agent \ --conf $FLUME_HOME/conf \ --conf-file $FLUME_HOME/conf/avro_memory_kafka.conf \ --name avro_memory_kafka \ -Dflume.root.logger=INFO,console
配置Kafka
Kafka部署
package com.hlsijx.spark.stream.kafka
import kafka.serializer.StringDecoder
import org.apache.spark.SparkConf
import org.apache.spark.streaming.kafka._
import org.apache.spark.streaming.{Seconds, StreamingContext}
object KafkaDirect {
//参数:hlsijx:9092 hello-spark
def main(args: Array[String]): Unit = {
if (args.length != 2){
System.err.print("Usage: KafkaDirect <brokerList> <topic>")
System.exit(1)
}
val Array(brokerList, topic) = args
val sparkConf = new SparkConf().setAppName("KafkaDirect").setMaster("local[2]")
val ssc = new StreamingContext(sparkConf, Seconds(5))
val kafkaParams = Map("metadata.broker.list" -> brokerList)
val topics = topic.split(",").toSet
val directKafkaStream = KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](ssc, kafkaParams, topics)
val wordCount = directKafkaStream.map(_._2).count()
wordCount.print()
ssc.start()
ssc.awaitTermination()
}
}
启动后,就能看到消费的日志条数记录