面试题描述:
实现代码如下:
package Test
import org.apache.spark.{SparkConf, SparkContext}
import scala.collection.mutable.ArrayBuffer
/**
* Created by TG.
*/
/** 测试数据如下:
userA locationA 8 60
userA locationA 9 60
userB locationB 8 60
userA locationA 11 60
userA locationA 10 60
userA locationA 3 60
userA locationA 4 60
userA locationA 5 60
userA locationA 13 10
userB locationB 10 30
userB locationB 11 20
userB locationB 13 20
userB locationB 15 10
userB locationB 9 30
userA locationB 3 60
userA locationB 4 60
userA locationB 5 60
userA locationB 7 60
*/
object TestData {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local").setAppName("TestData")
val sc = new SparkContext(conf)
val lines = sc.textFile("C:\\datas.txt")
lines.map(x => {
val info = x.split(" ")
val user = info(0)
val location = info(1)
val times = info(2).toInt
val seconds = info(3).toInt
val userandlocation = user + "-" + location
(userandlocation, (times, seconds))
}).groupByKey()
.map(x => {
val userandlocation = x._1
val infos = x._2.toArray.sortWith(_._1 > _._1)
var temp = infos(0)
val buffer = ArrayBuffer[Tuple2[Int, Int]]()
var sum = temp._2
//12 10 9 8 6 5 4 3 2
//12 10 9 8 6 5 4 3 1
for (i <- 1 until infos.length) {
if (temp._1 - infos(i)._1 == 1) {
sum = sum + infos(i)._2
temp = infos(i)
if (i == infos.length - 1) { //12 10 9 8 6 5 4 3 2
buffer += Tuple2(infos(i)._1, sum)
}
} else {
buffer += Tuple2(infos(i - 1)._1, sum)
temp = infos(i)
sum = temp._2
if (i == infos.length - 1) { //12 10 9 8 6 5 4 3 1
buffer += Tuple2(infos(i)._1, sum)
}
}
}
if (infos.length == 1) {
(userandlocation, infos.toBuffer)
} else {
(userandlocation, buffer)
}
}).foreach(x => {
val userandlocation = x._1
println(userandlocation)
println(x._2.toBuffer)
println("=====================================")
})
sc.stop()
}
}
运行结果: