object RollUpMonthIncome {
def main(args: Array[String]): Unit = {
val sc = SparkUtils.createContext(true)
val lines = sc.textFile("data/shop.txt")
val reduced: RDD[((String, String), Int)] = lines.map(e => {
val fields = e.split(",")
val sid = fields(0)
val month = fields(1).substring(0, 7)
val money = fields(2).toInt
((sid, month), money)
}).reduceByKey(_ + _)
val res: RDD[(String, String, Int, Int)] = reduced.groupBy(_._1._1).flatMapValues(it => {
val sorted: List[((String, String), Int)] = it.toList.sortBy(_._1._2)
var rollUp = 0
sorted.map(e => {
val sid = e._1._1
val month = e._1._2
val money = e._2
rollUp += money
(sid, month, money, rollUp)
})
}).map(e => (e._1, e._2._2, e._2._3, e._2._4))
println(res.collect().toBuffer)
sc.stop()
}
}
工具类
object SparkUtils {
def createContext(isLocal: Boolean = false): SparkContext = {
val conf = new SparkConf().setAppName(this.getClass.getSimpleName)
if (isLocal) conf.setMaster("local[*]")
val sc = new SparkContext(conf)
sc
}
}