本demo全属于模拟
本demo所需的数据链接:
链接: https://pan.baidu.com/s/1KwBctUxE5AxfEBmiZfBmzQ 提取码: n3ft
import org.apache.spark.{SparkConf, SparkContext}
object NetAndHome {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("NetAndHome").setMaster("local[2]")
val sc = new SparkContext(conf)
val rdd0 = sc.textFile("E:\\bs_log").map( x =>{
val arr = x.split(",")
//拿到手机号和基站id
val pj = (arr(0),arr(2))
//获取时间戳
var time = arr(1).toLong
//获取业务类型
val phoneType = arr(3)
if (phoneType == "1") time = -time
//封装成我们想要的数据格式
(pj,time)
})
//统计相同pj出现的次数
val rdd1 = rdd0.reduceByKey(_+_)
//读取从基站获取的数据
val rdd2 = sc.textFile("E:\\lac_info.txt").map(x =>{
val arr = x.split(",")
//获取数据中的基站ID
val j = arr(0)
//封装成我们想要的数据格式(基站ID,(经度,维度))
(j,(arr(1),arr(2)))
})
//(基站,(手机号,时间))
val rdd3 = rdd1 .map(t =>(t._1._2,(t._1._1,t._2)))
//获取基站下的手机用户信息(基站,信息)
val rdd4 = rdd3.join(rdd2).map(t =>{
val j = t._1
val p = t._2._1._1
val time = t._2._1._2
val x = t._2._2._1
val y = t._2._2._2
(p,j,time,x,y)
})
//wordcount
val rdd5 = rdd4.groupBy(_._1)
val rdd6 = rdd5.mapValues(t =>{
t.toList.sortBy(_._3).reverse.take(2)
})
println(rdd6.collect().toBuffer)
rdd6.saveAsTextFile("E:\\out")
sc.stop()
/* ArrayBuffer(
(CC0710CC94ECC657A8561DE549D940E0,((18688888888,1300),(116.303955,40.041935))),
(CC0710CC94ECC657A8561DE549D940E0,((18611132889,1900),(116.303955,40.041935))),
(16030401EAFB68F1E3CDF819735E1C66,((18688888888,87600),(116.296302,40.032296))),
(16030401EAFB68F1E3CDF819735E1C66,((18611132889,97500),(116.296302,40.032296))),
(9F36407EAD0629FC166F14DDE7970F68,((18611132889,54000),(116.304864,40.050645))),
(9F36407EAD0629FC166F14DDE7970F68,((18688888888,51200),(116.304864,40.050645))))*/
/*ArrayBuffer(
(18688888888,CC0710CC94ECC657A8561DE549D940E0,1300,116.303955,40.041935),
(18611132889,CC0710CC94ECC657A8561DE549D940E0,1900,116.303955,40.041935),
(18688888888,16030401EAFB68F1E3CDF819735E1C66,87600,116.296302,40.032296),
(18611132889,16030401EAFB68F1E3CDF819735E1C66,97500,116.296302,40.032296),
(18611132889,9F36407EAD0629FC166F14DDE7970F68,54000,116.304864,40.050645),
(18688888888,9F36407EAD0629FC166F14DDE7970F68,51200,116.304864,40.050645))*/
/*ArrayBuffer(
(18688888888,
CompactBuffer(
(18688888888,CC0710CC94ECC657A8561DE549D940E0,1300,116.303955,40.041935),
(18688888888,16030401EAFB68F1E3CDF819735E1C66,87600,116.296302,40.032296),
(18688888888,9F36407EAD0629FC166F14DDE7970F68,51200,116.304864,40.050645))),
(18611132889,
CompactBuffer(
(18611132889,CC0710CC94ECC657A8561DE549D940E0,1900,116.303955,40.041935),
(18611132889,16030401EAFB68F1E3CDF819735E1C66,97500,116.296302,40.032296),
(18611132889,9F36407EAD0629FC166F14DDE7970F68,54000,116.304864,40.050645))))*/
/*ArrayBuffer(
(18688888888,
List(
(18688888888,16030401EAFB68F1E3CDF819735E1C66,87600,116.296302,40.032296),
(18688888888,9F36407EAD0629FC166F14DDE7970F68,51200,116.304864,40.050645))),
(18611132889,
List(
(18611132889,16030401EAFB68F1E3CDF819735E1C66,97500,116.296302,40.032296),
(18611132889,9F36407EAD0629FC166F14DDE7970F68,54000,116.304864,40.050645))))
*/
}
}