val tt1 = Array(
("1", ( "t1", "2014-03-01 00:00:00.000+01") ),
("1", ( "t1", "2014-03-01 00:01:00.000+01") ),
("2", ( "t1", "2014-03-02 00:00:00.000+01") ),
("2", ( "t1", "2014-03-03 00:00:00.000+01") ),
("2", ( "t1", "2014-03-04 00:00:00.000+01") ),
("3", ( "t1", "2014-03-03 00:00:00.000+01") )
)
val tt1kv = sc.parallelize(tt1)
def comparePair(one:(String,String), two:(String,String)): (String,String) = {
if (one._2.compareTo(two._2) > 0) one
else two
}
tt1kv.reduceByKey(comparePair).collect.foreach(println)
val tt2 = Array(
("1", ( "t2", "2014-03-01 00:02:00.000+01") ),
("2", ( "t2", "2014-03-02 00:02:00.000+01") ),
("2", ( "t2", "2014-03-03 00:02:00.000+01") ),
("2", ( "t2", "2014-03-04 00:02:00.000+01") )
)
val tt2kv = sc.parallelize(tt2)
val un = tt1kv.reduceByKey(comparePair).union(tt2kv.reduceByKey(comparePair))
un.collect.foreach(println)
un.groupByKey.collect.foreach(println)
un.filter(kv => kv._2._1 == "t2").collect.foreach(println)
un.groupByKey.filter(kv => kv._2.toSeq.length < 2).collect.foreach(println)
def haveAllTypes ( events:Seq[(String,String)] ): Boolean = {
if ( !events.filter(kv => kv._1 == "t1").isEmpty && !events.filter(kv => kv._1 == "t2").isEmpty ) {
true
} else {
false
}
}
un.groupByKey.filter( kv => haveAllTypes(kv._2.toSeq) ).collect.foreach(println)
def filterSLA ( events:Seq[(String,String)] ): Boolean = {
val eventMap = events.toMap
if ( (eventMap contains "t1") && (eventMap contains "t2") ) {
val format = new java.text.SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSSX")
val ts1 = format.parse(eventMap("t1")).getTime
val ts2 = format.parse(eventMap("t2")).getTime
if (ts2 - ts1 < 120000)
false
else
true
} else {
true
}
}
un.groupByKey.filter( kv => filterSLA(kv._2.toSeq) ).collect.foreach(println)
spark 实战笔记case1
最新推荐文章于 2019-02-22 15:50:42 发布