0.数据
val data=
"""
user date item1 item2
1 2015-12-01 14 5.6
1 2015-12-01 10 0.6
1 2015-12-02 8 9.4
1 2015-12-02 90 1.3
2 2015-12-01 30 0.3
2 2015-12-01 89 1.2
2 2015-12-30 70 1.9
2 2015-12-31 20 2.5
3 2015-12-01 19 9.3
3 2015-12-01 40 2.3
3 2015-12-02 13 1.4
3 2015-12-02 50 1.0
3 2015-12-02 19 7.8
"""
1.方案一
val data2 = data.trim.split("\\n").map(_.split("\\s+").map{
f=>{
(f(0),Listbuffer(f(1).toString,f(2).toInt,f(3).toDouble)
}
}
val data3 = sc.parallelize(data2)
val dataReduce = data3.reduceByKey((x,y) =>
if(x(0).toString >= y(0).toString) x else y)
val dataUserAndDateKey = data3.map{
rec=>((rec._1,rec._2(0)),rec)
}
val dataReduceUserAndDateKey = dataReduce.map{
rec => ((rec._1,rec._2(0)),rec)
}
val joinData = dataUserAndDateKey.join(dataReduceUserAndDateKey)
joinData.foreach(println)