获取每辆车的运行轨迹
1 1
1 2
1 3
1 1
2 1
2 3
2 4
================================
(1,[1,2,3,1])
(2,[1,3,4])
concat_ws('|',collect_list(road_id))
(1,1|2|3|1)
(2,1|3|4])
val sql1 = "select car,concat_ws(',',collect_list(road_id)) roads
from (
select car,road_id,action_time
from traffic.monitor_flow_action order by action_time
)t group by car"
val rdd1 = spark.sql(sql1).rdd
获取指定道路段转换的车流量
比如,指定[1,2,3,4,5]几条道路,需要统计
(1--->2,车牌数)
(1--->2--->3,车牌数)
(1--->2--->3--->4,车牌数)
(1--->2--->3--->4--->5,车牌数)
//指定[1,2,3,4,5]几条道路
val roadFlow = args(1)
val roadFlowBroad = spark.sparkContext.broadcast(roadFlow)
val rdd2 = rdd1.flatMap(row => {
val restRoad = row.getString(1)
// 从广播变量中得到指定的卡扣流参数
val roadparams = roadFlowBroad.value
// 对指定的卡扣流参数分割
val splited = roadparams.split(",")
// 用一个list来接受最终的结果
val goodList = new ListBuffer[(String,Long)]
// 遍历切割出来的数组,组成kv格式。k是卡扣id拼接,v是这辆车匹配到卡扣切片的次数
//1,2,3,4
for(i <- 1 to splited.length) {
var tmpRoad = ""
//1,2
//1,2,3
//1,2,3,4
for (j <- 0 until i) {
tmpRoad = tmpRoad + "," + splited(j)
}
tmpRoad = tmpRoad.substring(1)
// 从什么位置开始匹配
var index = 0
// 计数
var count = 0
// 循环匹配,获取指定道路段车流数
while (restRoad.indexOf(tmpRoad, index) != -1) {
index = restRoad.indexOf(tmpRoad, index) + 1
count += 1
}
//(道路段,车流数)
//(1--->2,xxx)
//(1--->2--->3,xxx)
//(1--->2--->3--->4,xxx)
goodList.+=((tmpRoad, count))
}
goodList
})
val rdd3 = rdd2.reduceByKey(_+_)
val map = rdd3.collectAsMap()
获取指定道路段车流转换率
// 计算转换率,保留两位小数
//1-->2-->3 / 1-->2
//1-->2-->3-->4 / 1-->2-->3
//1-->2-->3-->4-->5 / 1-->2-->3-->4
// 得到广播变量中的值 ,并切割
//1,2,3,4,5
val value = roadFlowBroad.value
val splits = value.split(",")
// 求转化率,把结果放入到map中
val restMap: mutable.Map[String,Double] = mutable.Map()
var lastCount = 0L
var str = ""
for(i <- 0 until splits.length){
//1,2
//1,2,3
//1,2,3,4
//1,2,3,4,5
str = str + "," + splits(i)
var count = map.get(str.substring(1))
if (count.get >= 0L) {
if (i != 0 && lastCount != 0L) {
// 计算转换率,保留两位小数
//1-->2-->3 / 1-->2
//1-->2-->3-->4 / 1-->2-->3
//1-->2-->3-->4-->5 / 1-->2-->3-->4
val reat = f"count.get.toDouble/lastCount.2f"
restMap.+=((str.substring(1), reat.toDouble))
}
lastCount = count.get
}
}
object MonitorOneStepConvertRateAnalyzer {
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder()
.master("local")
.appName("MonitorOneStepConvertRateAnalyzer")
//spark.eventLog.dir是记录Spark事件的基本目录,如果spark.eventLog.enabled为true。 在此基本目录中,
// Spark为每个应用程序创建一个子目录,并在此目录中记录特定于应用程序的事件
.config("fs.defaultFS", "hdfs://hadoop-senior.test.com")
.config("spark.sql.warehouse.dir", "hdfs://hadoop-senior.test.com:8020/test")
.enableHiveSupport()
.getOrCreate()
val roadFlow = args(1)
val roadFlowBroad = spark.sparkContext.broadcast(roadFlow)
val sql1 = "select car,concat_ws(',',collect_list(road_id)) roads from (select car,road_id,action_time from traffic.monitor_flow_action order by action_time)t group by car"
val rdd1 = spark.sql(sql1).rdd
val rdd2 = rdd1.flatMap(row => {
val restRoad = row.getString(1)
// 从广播变量中得到指定的卡扣流参数
val roadparams = roadFlowBroad.value
// 对指定的卡扣流参数分割
val splited = roadparams.split(",")
// 用一个list来接受最终的结果
val goodList = new ListBuffer[(String,Long)]
// 遍历切割出来的数组,组成kv格式。k是卡扣id拼接,v是这辆车匹配到卡扣切片的次数
for(i <- 1 to splited.length) {
var tmpRoad = ""
for (j <- 0 until i) {
tmpRoad = tmpRoad + "," + splited(j)
}
tmpRoad = tmpRoad.substring(1)
// 从什么位置开始匹配
var index = 0
// 计数
var count = 0
// 循环匹配
while (restRoad.indexOf(tmpRoad, index) != -1) {
index = restRoad.indexOf(tmpRoad, index) + 1
count += 1
}
goodList.+=((tmpRoad, count))
}
goodList
})
val rdd3 = rdd2.reduceByKey(_+_)
val map = rdd3.collectAsMap()
// 得到广播变量中的值 ,并切割
val value = roadFlowBroad.value
val splits = value.split(",")
// 求转化率,把结果放入到map中
val restMap: mutable.Map[String,Double] = mutable.Map()
var lastCount = 0L
var str = ""
for(i <- 0 until splits.length){
str = str + "," + splits(i)
var count = map.get(str.substring(1))
if (count.get >= 0L) {
if (i != 0 && lastCount != 0L) {
// 计算转换率,保留两位小数
val reat = f"count.get.toDouble/lastCount.2f"
restMap.+=((str.substring(1), reat.toDouble))
}
lastCount = count.get
}
}
spark.close()
}
}