object Test2 {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local[2]").setAppName("monitor_action_test2")
val sc = new SparkContext(conf)
sc.setLogLevel("WARN")
val ssc = new StreamingContext(sc, Seconds(5))
val kafkaParams = Map[String, Object](
"bootstrap.servers" -> "hadoop-senior.test.com:9092",
"key.deserializer" -> classOf[StringDeserializer],
"value.deserializer" -> classOf[StringDeserializer],
"group.id" -> "use_a_separate_group_id_for_each_stream",
"auto.offset.reset" -> "latest",
"enable.auto.commit" -> (false: java.lang.Boolean)
)
val topics = Array("RoadRealTimeLog")
val stream = KafkaUtils.createDirectStream[String, String](
ssc,
PreferConsistent,
Subscribe[String, String](topics, kafkaParams)
)
val cars = stream.map(line => line.value().split("\t")(3))
val result1 = cars.transform(rdd => rdd.distinct()).count()
result1.print()
val result2 = cars.map((_,1)).reduceByKey(_+_).filter(_._2 >= 2)
result2.print()
ssc.start()
ssc.awaitTermination()
}
}