按卡口分组,获取不同车速类型通过卡口的车辆数
val sql = "select * from traffic.monitor_flow_action"
val df = spark.sql(sql)
implicit val monitorFlowActionEncoder: Encoder[MonitorFlowAction] = ExpressionEncoder()
implicit val tupleEncoder: Encoder[Tuple2[String, MonitorFlowAction]] = ExpressionEncoder()
implicit val stringEncoder: Encoder[String] = ExpressionEncoder()
implicit val speedSortKeyEncoder: Encoder[SpeedSortKey] = ExpressionEncoder()
implicit val stringAndSpeedSortKeyEncoder: Encoder[Tuple2[String, SpeedSortKey]] = ExpressionEncoder()
val ds = df.as[MonitorFlowAction].map(action => (action.monitor_id, action))
ds.cache()
val ds1 = ds.groupByKey(tuple => tuple._1)
.mapGroups((i,x) => {
val monitorId = i
var lowSpeed = 0; //低速通过的车辆数
var normalSpeed = 0; //正常通过的车辆数
var mediumSpeed = 0; //中速通过的车辆数
var highSpeed = 0; //高速通过的车辆数
// 遍历速度,判断并统计四个值
while (x.hasNext) {
val speed = x.next()._2.speed.toInt
if (speed >= 0 && speed < 60) {
lowSpeed += 1
} else if (speed >= 60 && speed < 90) {
normalSpeed += 1
} else if (speed >= 90 && speed < 120) {
mediumSpeed += 1
} else if (speed >= 120) {
highSpeed += 1
}
}
(monitorId, SpeedSortKey(lowSpeed, normalSpeed, mediumSpeed, highSpeed))
})
按车速类型排序,获取top5车辆高速通过的卡口
import spark.implicits._
val arr = ds1.sort($"_2".desc).take(5)
val list = arr.map(item => item._1)
val monitorids = list.map(item => HighSpeed(task_id, item))
val df1 = spark.createDataFrame(monitorids)
df1.write.format("jdbc")
.option("driver", "com.mysql.jdbc.Driver")
.option("url", "jdbc:mysql://hadoop-senior.test.com:3306/traffic1")
.option("dbtable", "top5_monitor")
.option("user", "root")
.option("password","123456")
.mode(SaveMode.Append)
.save()
获取top5卡口对应的监控记录
//将top5卡口生成广播变量
val broads = spark.sparkContext.broadcast(list)
//从原始的监控数据中过滤出top5卡口数据
val ds2 = ds.filter(x => {
val monitorId = x._1
val aaa = broads.value
aaa.contains(monitorId)
})
ds2.map(_._2).createOrReplaceTempView("top10_speed_tmp")
获取top5卡口中每个卡口top10高速通过的车辆记录
val df2 = spark.sql("select " + args(0) + " as task_id, date,monitor_id,camera_id,car,action_time,speed,road_id,area_id from (select *, row_number() over(partition by monitor_id order by speed desc) rank from top10_speed_tmp) t where t.rank<=10")
df2.write.format("jdbc")
.option("driver", "com.mysql.jdbc.Driver")
.option("url", "jdbc:mysql://hadoop-senior.test.com:3306/traffic1")
.option("dbtable", "top10_speed_detail")
.option("user", "root")
.option("password","123456")
.mode(SaveMode.Append)
.save()
使用sql方式
select *,
if(speed>120,1,0) as highFlag,
if(speed<60,1,0) as lowerFlag,
#case when speed>120 then 1 else 0 end as highFlag,
#case when speed<60 then 1 else 0 end as lowerFlag,
from monitor_flow_action;
tmp1:
*,highFlag:1,middelFlag:0,normalFlag:0,lowerFlag:0
*,highFlag:0,middelFlag:1,normalFlag:0,lowerFlag:0
============================================
select monitor_id,sum(highFlag) highCnt,sum(middelFlag) middelCnt,....
group by monitor_id
from tmp1
order by highCnt desc,middelCnt desc,...
limit 5
object GetTopNSpeed {
System.setProperty("hadoop.home.dir","d://software/hadoop-2.9.2")
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder()
.master("local")
.appName("GetTopNSpeed")
.config("fs.defaultFS", "hdfs://hadoop-senior.test.com")
.config("spark.sql.warehouse.dir", "hdfs://hadoop-senior.test.com:8020/test")
.enableHiveSupport()
.getOrCreate()
val sql = "select * from traffic.monitor_flow_action"
val df = spark.sql(sql)
implicit val monitorFlowActionEncoder: Encoder[MonitorFlowAction] = ExpressionEncoder()
implicit val tupleEncoder: Encoder[Tuple2[String, MonitorFlowAction]] = ExpressionEncoder()
implicit val stringEncoder: Encoder[String] = ExpressionEncoder()
implicit val speedSortKeyEncoder: Encoder[SpeedSortKey] = ExpressionEncoder()
implicit val stringAndSpeedSortKeyEncoder: Encoder[Tuple2[String, SpeedSortKey]] = ExpressionEncoder()
val ds = df.as[MonitorFlowAction].map(action => (action.monitor_id, action))
ds.cache()
val ds1 = ds.groupByKey(tuple => tuple._1)
.mapGroups((i,x) => {
val monitorId = i
var lowSpeed = 0; //低速通过的车辆数
var normalSpeed = 0; //正常通过的车辆数
var mediumSpeed = 0; //中速通过的车辆数
var highSpeed = 0; //高速通过的车辆数
// 遍历速度,判断并统计四个值
while (x.hasNext) {
val speed = x.next()._2.speed.toInt
if (speed >= 0 && speed < 60) {
lowSpeed += 1
} else if (speed >= 60 && speed < 90) {
normalSpeed += 1
} else if (speed >= 90 && speed < 120) {
mediumSpeed += 1
} else if (speed >= 120) {
highSpeed += 1
}
}
(monitorId, SpeedSortKey(lowSpeed, normalSpeed, mediumSpeed, highSpeed))
})
import spark.implicits._
val arr = ds1.sort($"_2".desc).take(5)
val list = arr.map(item => item._1)
val monitorids = list.map(item => HighSpeed(args(0), item))
val df1 = spark.createDataFrame(monitorids)
df1.write.format("jdbc")
.option("driver", "com.mysql.jdbc.Driver")
.option("url", "jdbc:mysql://hadoop-senior.test.com:3306/traffic1")
.option("dbtable", "top5_monitor")
.option("user", "root")
.option("password","123456")
.mode(SaveMode.Append)
.save()
val broads = spark.sparkContext.broadcast(list)
val ds2 = ds.filter(x => {
val monitorId = x._1
val aaa = broads.value
aaa.contains(monitorId)
})
ds2.map(_._2).createOrReplaceTempView("top10_speed_tmp")
val df2 = spark.sql("select " + args(0) + " as task_id, date,monitor_id,camera_id,car,action_time,speed,road_id,area_id from (select *, row_number() over(partition by monitor_id order by speed desc) rank from top10_speed_tmp) t where t.rank<=10")
df2.write.format("jdbc")
.option("driver", "com.mysql.jdbc.Driver")
.option("url", "jdbc:mysql://hadoop-senior.test.com:3306/traffic1")
.option("dbtable", "top10_speed_detail")
.option("user", "root")
.option("password","123456")
.mode(SaveMode.Append)
.save()
spark.close()
}
}
//自定义排序
case class SpeedSortKey(lowSpeed:Int, normalSpeed:Int, mediumSpeed:Int, highSpeed:Int) extends Ordered[SpeedSortKey] with Serializable{
override def compare(that:SpeedSortKey):Int = {
if(this.highSpeed - that.highSpeed != 0){
return this.highSpeed - that.highSpeed
}else if (this.mediumSpeed - that.mediumSpeed != 0) {
return this.mediumSpeed - that.mediumSpeed
}else if (this.normalSpeed - that.normalSpeed != 0) {
return this.normalSpeed - that.normalSpeed
}else if (this.lowSpeed - that.lowSpeed != 0) {
return this.lowSpeed - that.lowSpeed
}
0
}
override def toString:String = "SpeedSortKey [lowSpeed=" + lowSpeed + ", normalSpeed=" + normalSpeed + ", mediumSpeed=" + mediumSpeed + ", highSpeed=" + highSpeed + "]"
}
case class HighSpeed(task_id: String, monitor_id: String)
case class MonitorFlowAction(date:String, monitor_id:String, camera_id:String,car:String,action_time:String,speed:String,road_id:String,area_id:String)