spark-套牌车

该博客展示了一个使用Spark处理GPS数据的示例,通过读取PostgreSQL数据库中的数据,将其转换为RDD,然后按车牌号分区并排序。接着,计算相邻位置点之间的速度,如果超过预设阈值(34m/s,相当于高速限速),则标记为异常,并统计异常次数。最终,将异常速度次数大于3的车辆识别为套牌车。
摘要由CSDN通过智能技术生成
package spark
import breeze.numerics.{acos, cos, sin}
import org.apache.spark.{Partitioner, SparkConf}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{Row, SparkSession}
import scala.collection.immutable.HashMap

object task11r {
  def main(args: Array[String]): Unit = {
    val conf=new SparkConf().setMaster("local[*]").setAppName("sparkreadgp")
    val spark=SparkSession.builder().config(conf).getOrCreate()

    val frame1 = spark.read
      .format("jdbc")
      .option("url", "jdbc:postgresql://???/demo")
      .option("user", "???")
      .option("password", "???")
      .option("dbtable", "gcsj") //表名
      .option("driver","org.postgresql.Driver")
      .load()

    val rdd: RDD[Row] = frame1.rdd //将DataFrame转换成rdd
    val value1: RDD[(String, (String, String, String))] = rdd.map(row => {
      val strings = row.toString().split(",")
      (strings(1), (strings(2), strings(3), strings(4).split("]")(0)))
    }) //(车牌号,(时间,经度,纬度))
    //将数据按车牌号进行分区
    val keys: Array[String] = value1.map(a => a._1).collect()
    val value3: RDD[(String, (String, String, String))] = value1.partitionBy(new UDFPartitioner(keys))
    //对同一个区内的数据按时间排序(升序)
    val value2: RDD[(String, (String, String, String,Int))] = value3.mapPartitions(iter => {
      iter.map(
        member=>{
          (member._1,(member._2._1,member._2._2,member._2._3,0))
        }
      ).toList.sortBy(a => a._2._1).toIterator
    }) //(车牌号,(时间,经度,纬度,异常次数))
    //对区内数据进行筛选
    val value: RDD[(String, Int)] = value2.mapPartitions(iter => {
      val list: List[(String, (String, String, String, Int))] = iter.toList.sortBy(a=>{a._2._1})
      var i: Int = 0
      for (elem <- 0 until list.size - 1) {
        val a: (String, (String, String, String, Int)) = list(elem)
        val b: (String, (String, String, String, Int)) = list(elem + 1)
        //判断a和b是否是同一辆车
        if(a._1==b._1){
            val date1 = a._2._1.toInt
            val date2 = b._2._1.toInt
            val sj = date2 - date1
            //距离单位是米
            //=6371004*ACOS(COS(E2)*COS(J2)*COS(D2-I2)+SIN(E2)*SIN(J2))*3.1415926535898/180
            /*
            C = sin(MLatA)*sin(MLatB)*cos(MLonA-MLonB) + cos(MLatA)*cos(MLatB)
            Distance = R*Arccos(C)*Pi/180
             */
            val instance = 6371004 * acos(sin(a._2._3.toFloat) * sin(b._2._3.toFloat) * cos(a._2._2.toFloat - b._2._2.toFloat)
              + cos(a._2._3.toFloat) * cos(b._2._3.toFloat)) * 3.1415926535898/180
            val speed = instance / sj  //单位是m/s
            //高速公路限速120km/h,换算成m/s,然后向上取整34m/s
            if (speed > 34.0) {
              i += 1
          }
        }
      }
      List((list(0)._1, i)).toIterator
    })
   val value4=value.map(a => {
      if (a._2 > 3) {
        (a._1,s"车辆速度发生异常次数为${a._2}-将此车辆归为套牌车")
      }
    })
    val value5 = value4.filter(a=>a != ())
    value5.collect().foreach(println)
    spark.stop()
  }
}
//定义根据车牌号进行分区的类
class UDFPartitioner(args: Array[String]) extends Partitioner {
  private var partitionMap: HashMap[String, Int] = new HashMap[String, Int]()
  var parId = 0
  for (arg <- args) {
    if (!partitionMap.contains(arg)) {
      partitionMap += (arg -> parId)
      parId += 1
    }
  }
  //分区数量
  override def numPartitions: Int = partitionMap.valuesIterator.length
  //根据关键字获取分区
  override def getPartition(key: Any): Int = {
    val keys: String = key.asInstanceOf[String]
    val sub = keys
    partitionMap(sub)
  }
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值