spark-长时间未出现人的探索

package spark
import org.apache.spark.sql.{DataFrame, Row, SparkSession}
import org.apache.spark.SparkConf
import java.util.{Calendar, Date}
import java.text.SimpleDateFormat
import scalikejdbc.{DB, SQL}
import scalikejdbc.config.DBs
object task3 {
  def main(args: Array[String]): Unit = {

    val conf = new SparkConf().setMaster("local[*]").setAppName("yichang")
    val spark=SparkSession.builder()
              .config(conf)
              .config("spark.sql.debug.maxToStringFields", "1000")
              .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
              .getOrCreate()
    val frame: DataFrame = spark.read
                          .format("jdbc")
                          .option("url", "jdbc:postgresql://???/demo")
                          .option("user", "postgres")
                          .option("password", "???")
                          .option("dbtable", "???") //表名
                          .option("driver", "org.postgresql.Driver")
                          .load()
    frame.createOrReplaceTempView("person_pass_all")

    var select_person= "张三"  //选择目标人员
    var select_person_idcard= "12334567886655423"  //选择目标人员身份证号
    var long_sj=7  //设置时间长度,比如一天,七天,以天为单位
    var ts=long_sj*24*60*60
    /**
     * 关注目标人员长时间
     * 无轨迹的异常分析
     */
    val frame1 = spark.sql(
      s"""
         |select id,
         |name,
         |gender,
         |nation,
         |birth,
         |address,
         |idcard,
         |unix_timestamp(pass_time) as pass_time_sjc,
         |area_name
         |from person_pass_all
         |where name='$select_person'
         |and idcard=$select_person_idcard
         |order by pass_time_sjc
         |""".stripMargin)

    frame1.createOrReplaceTempView("target_ordered_data")

    spark.sql(
      s"""
         |select id,name,gender,nation,birth,
         |address,
         |idcard,
         |pass_time_sjc,
         |area_name,
         |LEAD(target_ordered_data.pass_time_sjc,1,0) over(PARTITION BY target_ordered_data.idcard ORDER BY target_ordered_data.pass_time_sjc asc) as next_pass_time_sjc,
         |LEAD(target_ordered_data.area_name,1,0) over(PARTITION BY target_ordered_data.idcard ORDER BY target_ordered_data.pass_time_sjc asc) as next_area_name
         |from target_ordered_data
         |""".stripMargin).rdd.map(row => {
            val name = row.getAs[String]("name")
            val idcard = row.getAs[String]("idcard")
            val pass_time_sjc = row.getAs[Long]("pass_time_sjc")
            val area_name = row.getAs[String]("area_name")
            val next_pass_time_sjc=row.getAs[Long]("next_pass_time_sjc")
            val next_area_name = row.getAs[String]("next_area_name")
            (name, idcard, pass_time_sjc, area_name,next_pass_time_sjc,next_area_name)
        }).sortBy(_._3).foreachPartition(iter=> {
            DBs.setupAll()
            val list: List[(String, String, Long, String, Long, String)] = iter.toList
            list.foreach { a => {
              val name = a._1
              val idcard = a._2
              val first_pass_time_sjc = a._3
              val next_pass_time_sjc = a._5
              val first_area_name = a._4
              val next_area_name = a._6
              val sdf: SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.sss")
              val first_t1: String = sdf.format(new Date(first_pass_time_sjc * 1000L))
              val next_t2: String = sdf.format(new Date(next_pass_time_sjc * 1000L))
              val first_next_sjc = next_pass_time_sjc - first_pass_time_sjc
              val first_next_sjc_transform = first_next_sjc / 60 / 60 / 24
              if ((next_pass_time_sjc != 0) && (next_area_name != 0.toString)) {
                if (first_next_sjc >= ts) {
                  DB.localTx(implicit session => {
                    SQL("insert into teee(name,idcard,sjc,shuoming) values(?,?,?,?)").bind(name, idcard, s"${first_next_sjc_transform}天", s"${first_t1}${next_t2}这段时间内没有通行数据").update().apply()
                  })
                }
              }
              else {
                  DB.autoCommit(implicit session => {
                    SQL("insert into last_data(name,idcard,last_pass_time,sjc,last_area_name) values(?,?,?,?,?)").bind(name, idcard,first_t1,first_pass_time_sjc, first_area_name).update().apply()
                  })
                }
              }
            }
           })

    val frame3=spark.read.format("jdbc")
      .option("url", "jdbc:postgresql://????/demo")
      .option("user", "???")
      .option("password", "???")
      .option("dbtable", "last_data") //表名
      .option("driver", "org.postgresql.Driver")
      .load()

    frame3.rdd.filter(row=>row.getAs[String]("idcard")==select_person_idcard).foreachPartition(iter=>{
      DBs.setupAll()
      iter.foreach(row=>{
        val name=row.getAs[String]("name")
        val idcard=row.getAs[String]("idcard")
        val last_pass_time=row.getAs[String]("last_pass_time")
        val last_pass_time_sjc=row.getAs[String]("sjc")
        val last_pass_time_sjc_long=last_pass_time_sjc.toLong
        val last_area_name=row.getAs[String]("last_area_name")
        val cal = Calendar.getInstance
        val current_time=cal.getTimeInMillis/1000.0
        val gen_xin_shijiancha=current_time - last_pass_time_sjc_long
        val sdf1: SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.sss")
        val first_t11: String = sdf1.format(new Date(last_pass_time_sjc_long * 1000L))
        val current_t11: String = sdf1.format(new Date(current_time.toLong * 1000L))
        val gen_xin_shijiancha_transform = gen_xin_shijiancha / 60 / 60 / 24
        if(gen_xin_shijiancha>=ts) {

          val list = DB.readOnly { implicit session =>
           SQL(s"select * from person_pass where pass_time > '$first_t11' and pass_time <= '$current_t11' and name='$name' and idcard='$idcard' order by pass_time")
             .map(_.toMap()).list().apply()
          }
          if (list.length == 0) {
           //更新teee库,表示增加未出现数据
            DB.localTx(implicit session => {
              SQL("insert into teee(name,idcard,sjc,shuoming) values(?,?,?,?)").bind(name, idcard, s"${gen_xin_shijiancha_transform}天", s"${first_t11}${current_t11}这段时间内没有通行数据").update().apply()
            })

          } else {
            val maps: List[Map[String, Any]] = list.takeRight(1)
            val new_last=maps(0)
            DB.autoCommit(implicit session=>{
              SQL(
                s"""
                   |update last_data set
                   |last_area_name='${new_last("area_name")}',
                   |last_pass_time='${new_last("pass_time")}',
                   |sjc=extract(epoch from timestamp '${new_last("pass_time")}')
                   |where name = '$name' and
                   |idcard = '$idcard'
                   |""".stripMargin).update().apply()
            })

         }
       }
      })
    })
    spark.stop()
  }
}
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值