package spark
import breeze.numerics.{acos, cos, sin}
import org.apache.spark.{Partitioner, SparkConf}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{Row, SparkSession}
import scala.collection.immutable.HashMap
object task11r {
def main(args: Array[String]): Unit = {
val conf=new SparkConf().setMaster("local[*]").setAppName("sparkreadgp")
val spark=SparkSession.builder().config(conf).getOrCreate()
val frame1 = spark.read
.format("jdbc")
.option("url", "jdbc:postgresql://???/demo")
.option("user", "???")
.option("password", "???")
.option("dbtable", "gcsj") //表名
.option("driver","org.postgresql.Driver")
.load()
val rdd: RDD[Row] = frame1.rdd //将DataFrame转换成rdd
val value1: RDD[(String, (String, String, String))] = rdd.map(row => {
val strings = row.toString().split(",")
(strings(1), (strings(2), strings(3), strings(4).split("]")(0)))
}) //(车牌号,(时间,经度,纬度))
//将数据按车牌号进行分区
val keys: Array[String] = value1.map(a => a._1).collect()
val value3: RDD[(String, (String, String, String))] = value1.partitionBy(new UDFPartitioner(keys))
//对同一个区内的数据按时间排序(升序)
val value2: RDD[(String, (String, String, String,Int))] = value3.mapPartitions(iter => {
iter.map(
member=>{
(member._1,(member._2._1,member._2._2,member._2._3,0))
}
).toList.sortBy(a => a._2._1).toIterator
}) //(车牌号,(时间,经度,纬度,异常次数))
//对区内数据进行筛选
val value: RDD[(String, Int)] = value2.mapPartitions(iter => {
val list: List[(String, (String, String, String, Int))] = iter.toList.sortBy(a=>{a._2._1})
var i: Int = 0
for (elem <- 0 until list.size - 1) {
val a: (String, (String, String, String, Int)) = list(elem)
val b: (String, (String, String, String, Int)) = list(elem + 1)
//判断a和b是否是同一辆车
if(a._1==b._1){
val date1 = a._2._1.toInt
val date2 = b._2._1.toInt
val sj = date2 - date1
//距离单位是米
//=6371004*ACOS(COS(E2)*COS(J2)*COS(D2-I2)+SIN(E2)*SIN(J2))*3.1415926535898/180
/*
C = sin(MLatA)*sin(MLatB)*cos(MLonA-MLonB) + cos(MLatA)*cos(MLatB)
Distance = R*Arccos(C)*Pi/180
*/
val instance = 6371004 * acos(sin(a._2._3.toFloat) * sin(b._2._3.toFloat) * cos(a._2._2.toFloat - b._2._2.toFloat)
+ cos(a._2._3.toFloat) * cos(b._2._3.toFloat)) * 3.1415926535898/180
val speed = instance / sj //单位是m/s
//高速公路限速120km/h,换算成m/s,然后向上取整34m/s
if (speed > 34.0) {
i += 1
}
}
}
List((list(0)._1, i)).toIterator
})
val value4=value.map(a => {
if (a._2 > 3) {
(a._1,s"车辆速度发生异常次数为${a._2}-将此车辆归为套牌车")
}
})
val value5 = value4.filter(a=>a != ())
value5.collect().foreach(println)
spark.stop()
}
}
//定义根据车牌号进行分区的类
class UDFPartitioner(args: Array[String]) extends Partitioner {
private var partitionMap: HashMap[String, Int] = new HashMap[String, Int]()
var parId = 0
for (arg <- args) {
if (!partitionMap.contains(arg)) {
partitionMap += (arg -> parId)
parId += 1
}
}
//分区数量
override def numPartitions: Int = partitionMap.valuesIterator.length
//根据关键字获取分区
override def getPartition(key: Any): Int = {
val keys: String = key.asInstanceOf[String]
val sub = keys
partitionMap(sub)
}
}
spark-套牌车
于 2022-06-17 18:27:42 首次发布
该博客展示了一个使用Spark处理GPS数据的示例,通过读取PostgreSQL数据库中的数据,将其转换为RDD,然后按车牌号分区并排序。接着,计算相邻位置点之间的速度,如果超过预设阈值(34m/s,相当于高速限速),则标记为异常,并统计异常次数。最终,将异常速度次数大于3的车辆识别为套牌车。
摘要由CSDN通过智能技术生成