package spark
import org.apache.spark.sql.{DataFrame, Row, SparkSession}
import org.apache.spark.SparkConf
import java.util.{Calendar, Date}
import java.text.SimpleDateFormat
import scalikejdbc.{DB, SQL}
import scalikejdbc.config.DBs
object task3 {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local[*]").setAppName("yichang")
val spark=SparkSession.builder()
.config(conf)
.config("spark.sql.debug.maxToStringFields", "1000")
.config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
.getOrCreate()
val frame: DataFrame = spark.read
.format("jdbc")
.option("url", "jdbc:postgresql://???/demo")
.option("user", "postgres")
.option("password", "???")
.option("dbtable", "???") //表名
.option("driver", "org.postgresql.Driver")
.load()
frame.createOrReplaceTempView("person_pass_all")
var select_person= "张三" //选择目标人员
var select_person_idcard= "12334567886655423" //选择目标人员身份证号
var long_sj=7 //设置时间长度,比如一天,七天,以天为单位
var ts=long_sj*24*60*60
/**
* 关注目标人员长时间
* 无轨迹的异常分析
*/
val frame1 = spark.sql(
s"""
|select id,
|name,
|gender,
|nation,
|birth,
|address,
|idcard,
|unix_timestamp(pass_time) as pass_time_sjc,
|area_name
|from person_pass_all
|where name='$select_person'
|and idcard=$select_person_idcard
|order by pass_time_sjc
|""".stripMargin)
frame1.createOrReplaceTempView("target_ordered_data")
spark.sql(
s"""
|select id,name,gender,nation,birth,
|address,
|idcard,
|pass_time_sjc,
|area_name,
|LEAD(target_ordered_data.pass_time_sjc,1,0) over(PARTITION BY target_ordered_data.idcard ORDER BY target_ordered_data.pass_time_sjc asc) as next_pass_time_sjc,
|LEAD(target_ordered_data.area_name,1,0) over(PARTITION BY target_ordered_data.idcard ORDER BY target_ordered_data.pass_time_sjc asc) as next_area_name
|from target_ordered_data
|""".stripMargin).rdd.map(row => {
val name = row.getAs[String]("name")
val idcard = row.getAs[String]("idcard")
val pass_time_sjc = row.getAs[Long]("pass_time_sjc")
val area_name = row.getAs[String]("area_name")
val next_pass_time_sjc=row.getAs[Long]("next_pass_time_sjc")
val next_area_name = row.getAs[String]("next_area_name")
(name, idcard, pass_time_sjc, area_name,next_pass_time_sjc,next_area_name)
}).sortBy(_._3).foreachPartition(iter=> {
DBs.setupAll()
val list: List[(String, String, Long, String, Long, String)] = iter.toList
list.foreach { a => {
val name = a._1
val idcard = a._2
val first_pass_time_sjc = a._3
val next_pass_time_sjc = a._5
val first_area_name = a._4
val next_area_name = a._6
val sdf: SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.sss")
val first_t1: String = sdf.format(new Date(first_pass_time_sjc * 1000L))
val next_t2: String = sdf.format(new Date(next_pass_time_sjc * 1000L))
val first_next_sjc = next_pass_time_sjc - first_pass_time_sjc
val first_next_sjc_transform = first_next_sjc / 60 / 60 / 24
if ((next_pass_time_sjc != 0) && (next_area_name != 0.toString)) {
if (first_next_sjc >= ts) {
DB.localTx(implicit session => {
SQL("insert into teee(name,idcard,sjc,shuoming) values(?,?,?,?)").bind(name, idcard, s"${first_next_sjc_transform}天", s"${first_t1}到${next_t2}这段时间内没有通行数据").update().apply()
})
}
}
else {
DB.autoCommit(implicit session => {
SQL("insert into last_data(name,idcard,last_pass_time,sjc,last_area_name) values(?,?,?,?,?)").bind(name, idcard,first_t1,first_pass_time_sjc, first_area_name).update().apply()
})
}
}
}
})
val frame3=spark.read.format("jdbc")
.option("url", "jdbc:postgresql://????/demo")
.option("user", "???")
.option("password", "???")
.option("dbtable", "last_data") //表名
.option("driver", "org.postgresql.Driver")
.load()
frame3.rdd.filter(row=>row.getAs[String]("idcard")==select_person_idcard).foreachPartition(iter=>{
DBs.setupAll()
iter.foreach(row=>{
val name=row.getAs[String]("name")
val idcard=row.getAs[String]("idcard")
val last_pass_time=row.getAs[String]("last_pass_time")
val last_pass_time_sjc=row.getAs[String]("sjc")
val last_pass_time_sjc_long=last_pass_time_sjc.toLong
val last_area_name=row.getAs[String]("last_area_name")
val cal = Calendar.getInstance
val current_time=cal.getTimeInMillis/1000.0
val gen_xin_shijiancha=current_time - last_pass_time_sjc_long
val sdf1: SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.sss")
val first_t11: String = sdf1.format(new Date(last_pass_time_sjc_long * 1000L))
val current_t11: String = sdf1.format(new Date(current_time.toLong * 1000L))
val gen_xin_shijiancha_transform = gen_xin_shijiancha / 60 / 60 / 24
if(gen_xin_shijiancha>=ts) {
val list = DB.readOnly { implicit session =>
SQL(s"select * from person_pass where pass_time > '$first_t11' and pass_time <= '$current_t11' and name='$name' and idcard='$idcard' order by pass_time")
.map(_.toMap()).list().apply()
}
if (list.length == 0) {
//更新teee库,表示增加未出现数据
DB.localTx(implicit session => {
SQL("insert into teee(name,idcard,sjc,shuoming) values(?,?,?,?)").bind(name, idcard, s"${gen_xin_shijiancha_transform}天", s"${first_t11}到${current_t11}这段时间内没有通行数据").update().apply()
})
} else {
val maps: List[Map[String, Any]] = list.takeRight(1)
val new_last=maps(0)
DB.autoCommit(implicit session=>{
SQL(
s"""
|update last_data set
|last_area_name='${new_last("area_name")}',
|last_pass_time='${new_last("pass_time")}',
|sjc=extract(epoch from timestamp '${new_last("pass_time")}')
|where name = '$name' and
|idcard = '$idcard'
|""".stripMargin).update().apply()
})
}
}
})
})
spark.stop()
}
}
spark-长时间未出现人的探索
于 2022-06-17 18:39:31 首次发布