val conf=newSparkConf().setAppName("App Update").setMaster("local")
val sc=newSparkContext(conf)
val rdd=sc.textFile("E:\\data\\spark\\rdd\\test\\read\\app.log")
val mapRdd=rdd.filter(line=>line.contains("v")).map(line=>{
val arr=line.split(",",-1)((arr(0),arr(1),arr(2),arr(3)),arr(5))})
val keyRdd=mapRdd.groupByKey()
val filterRdd=keyRdd.mapValues(line=>line.toList.distinct).filter(line=>line._2.length>1)
val zipRdd=filterRdd.mapValues(line=>line.zip(line.tail))
val resultRdd=zipRdd.flatMap(line=>{
line._2.map(x=>{(line._1._1, line._1._2, line._1._3, line._1._4, x._1, x._2)})})
resultRdd.foreach(println)