spark消费kafka数据
1.导入maven
<properties>
<spark.version>2.1.1</spark.version>
</properties>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-10_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
2.创建kafkaconsumer.properties
bootstrap.servers=hadoop102:9092,hadoop103:9092,hadoop104:9092
group.id=spark_0615
key.deserializer=org.apache.kafka.common.serialization.StringDeserializer
value.deserializer=org.apache.kafka.common.serialization.StringDeserializer
auto.offset.reset=earlist
3.消费kafka数据
val conf: SparkConf = new SparkConf().setAppName("KafkaTest").setMaster("local[*]")
conf.set("spark.streaming.kafka.maxRatePerPartition", "100")
conf.set("spark.streaming.pressure.enable", "true")
conf.set("spark.streaming.stopGracefullyOnShutdown", "true")
val ssc = new StreamingContext(conf, Seconds(5))
val kafkaPara: Map[String, String] = Map(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> "hadoop102:9092,hadoop103:9092,hadoop104:9092",
ConsumerConfig.GROUP_ID_CONFIG -> "bigdata",
ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> "org.apache.kafka.common.serialization.StringDeserializer",
ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> "org.apache.kafka.common.serialization.StringDeserializer")
val offsets = Map(new TopicPartition("first", 0) -> 1L)
val consumerStrategy: ConsumerStrategy[String, String] = ConsumerStrategies.Subscribe[String, String](List("first"), kafkaPara, offsets)
val inputDStream: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream[String, String](ssc, LocationStrategies.PreferConsistent, consumerStrategy)
var offsetRanges: Array[OffsetRange] = Array.empty[OffsetRange]
val lineDStream: DStream[String] = inputDStream.transform(rdd => {
offsetRanges = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
rdd.map(x => x.value())
})
lineDStream.print()
offsetRanges.foreach(x => x.untilOffset)
ssc.start()
ssc.awaitTermination()
flink消费kafka数据
1.导入maven
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-scala_2.11</artifactId>
<version>1.7.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_2.11</artifactId>
<version>1.7.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka-0.11_2.11</artifactId>
<version>1.7.2</version>
</dependency>
2.创建kafkaconsumer.properties
bootstrap.servers=hadoop102:9092,hadoop103:9092,hadoop104:9092
group.id=flink_0615
key.deserializer=org.apache.kafka.common.serialization.StringDeserializer
value.deserializer=org.apache.kafka.common.serialization.StringDeserializer
3.消费kafka的value数据
object FromKafkaSource {
def main(args: Array[String]): Unit = {
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
val properties: Properties = ProPertiesUtil.getProperties("kafkaconsumer.properties")
val lineDataStream: DataStream[String] = env.addSource(new FlinkKafkaConsumer011[String]("flink", new SimpleStringSchema(), properties))
val wordToCountDataStream: DataStream[(String, Int)] = lineDataStream.flatMap(_.split(" ")).map((_, 1)).keyBy(0).sum(1)
wordToCountDataStream.print()
env.execute()
}
}
4.消费kafka的key-value数据
object FromKafkaSource {
def main(args: Array[String]): Unit = {
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
val properties: Properties = ProPertiesUtil.getProperties("kafkaconsumer.properties")
val keyToValueDataStream: DataStream[(String, String)] = env.addSource(new FlinkKafkaConsumer011[(String, String)]("flink", new KeyedDeserializationSchema[(String, String)] {
override def isEndOfStream(nextElement: (String, String)): Boolean = {
false
}
override def deserialize(messageKey: Array[Byte], message: Array[Byte], topic: String, partition: Int, offset: Long): (String, String) = {
if (messageKey != null && message != null) {
val key = new String(messageKey, "UTF-8")
val value = new String(message, "UTF-8")
(key, value)
} else {
("null", "null")
}
}
override def getProducedType: TypeInformation[(String, String)] = {
createTuple2TypeInformation(createTypeInformation[String], createTypeInformation[String])
}
}, properties))
keyToValueDataStream.print()
env.execute()
}
}