package cn.bw.spark.day02
import org.apache.spark.{SparkConf, SparkContext}
object LacCompute {
def main(args: Array[String]): Unit = {
//setMaster在本地做的时候指定一个虚拟的核数
val conf = new SparkConf().setAppName("laccomputer").setMaster("local[3]")
val sc = new SparkContext(conf)
val rdd1 = sc.textFile("E:/test").map(t => { //读取本地文件
val fields = t.split(",")
val phone = fields(0)
val time = fields(1)
val lacId = fields(2)
val status = fields(3)
val timeLong = if (status == "1") -time.toLong else time.toLong
//判断停留状态为1的话就是 进入某个基站了 设进去时间为负数 出去时间减去进去时间 好计算 类似于(_+(-1))
((phone, lacId),timeLong)//只要手机号和ID 还有停留时间
})
//println(rdd1.collect().toBuffer) rdd1结果如下:(((18688888888,16030401EAFB68F1E3CDF819735E1C66),-20160327082400)
val rdd2 = rdd1.reduceByKey(_+_).map(t => {
//(基站id,(手机号,停留时间))
(t._1._2, (t._1._1,t._2)) //根据基站的ID进行分组 累加停留的时间 map是无序不重复的
}) //根据key分组 累加value
//println(rdd2.collect().toBuffer) rdd2结果如下(CC0710CC94ECC657A8561DE549D940E0,(18688888888,1300))
//操作基站
val rdd3 = sc.textFile("E:/loc_info.txt").map(line =>{
val lcaInfo = line.split(",")
// (基站id, 经度, 纬度)
(lcaInfo(0), (lcaInfo(1),lcaInfo(2)))
})
// println(rdd3.collect().toBuffer) rdd3结果如((9F36407EAD0629FC166F14DDE7970F68,(116.304864,40.050645))
val joined = rdd2.join(rdd3).map(t=>{ //注意join的时候两个的类型必须都一致!!
// join后的结果(CC0710CC94ECC657A8561DE549D940E0,((18688888888,1300),(116.303955,40.041935)))
val lacId = t._1
val phone = t._2._1._1
val time = t._2._1._2
val x = t._2._2._1
val y = t._2._2._2
(phone, lacId, time, x, y)
})
val rdd5 = joined.groupBy(_._1).mapValues(it=>{ //根据join后的结果按照手机号进行分区
it.toList.sortBy(_._3).reverse.take(3)//再对它的value进行toList转换才能进行对停留时间的排序 最后再取前三条数据
})
rdd5.saveAsTextFile("D:/sparkTest3")//写文件到本地
//println(rdd5.collect.toBuffer)
//rdd5的结果((18688888888,List((18688888888,16030401EAFB68F1E3CDF819735E1C66,87600,116.296302,40.032296)
}
}
代码如上
读取的文件如下:
"E:/test"
18688888888,20160327082400,16030401EAFB68F1E3CDF819735E1C66,1
18611132889,20160327082500,16030401EAFB68F1E3CDF819735E1C66,1
18688888888,20160327170000,16030401EAFB68F1E3CDF819735E1C66,0
18611132889,20160327180000,16030401EAFB68F1E3CDF819735E1C66,0
"E:/loc_info.txt"
9F36407EAD0629FC166F14DDE7970F68,116.304864,40.050645,6
CC0710CC94ECC657A8561DE549D940E0,116.303955,40.041935,6
16030401EAFB68F1E3CDF819735E1C66,116.296302,40.032296,6