package com.wz.udf
import java.util.Date
import org.apache.commons.lang3.time.FastDateFormat
import org.apache.spark.sql.SparkSession
//登录时间 登录账号
//10:31:11 aa
//10:31:20 aa
//10:31:33 aa
//10:32:10 aa
//10:37:11 aa
//10:36:20 aa
//10:35:33 aa
//10:32:53 aa
//
//10:31:11 bb
//10:31:20 cc
//10:31:33 bb
//10:32:10 dd
//10:37:11 dd
//10:36:20 cc
//10:35:33 cc
//10:32:53 cc
//
//需求:统计每个用户一小时内最大的登录数
1、将时间转成毫秒
0-3600 一个小时
/*val sql =
"""
| select temp.name,max(temp.cn) from (
|
| select a.name, a.time , count(1) cn
| from logintable a inner join logintable b
| on a.name = b.name
| and formatTime2ms(a.time)- formatTime2ms(b.time)<=3600000
| and formatTime2ms(a.time)- formatTime2ms(b.time)>=0
| group by a.time,a.name
|
| )temp
| group by temp.name
""".stripMargin*/
/*+----+-------+
|name|max(cn)|
+----+-------+
| cc| 2|
| bb| 2|
| dd| 2|
| aa| 8|
+----+-------+*/
object demo {
val spark: SparkSession = SparkSession.builder()
.appName("homework02")
.master("local[6]")
.getOrCreate()
spark.sparkContext.setLogLevel("ERROR")
import spark.implicits._
val seq_login =Seq(
("10:31:11","aa"),
("10:31:20","aa"),
("10:31:33","aa"),
("10:32:10","aa"),
("10:37:11","aa"),
("10:36:20","aa"),
("10:35:33","aa"),
("10:32:53","aa"),
("12:32:53","aa"),
("12:34:53","aa"),
("10:31:11","bb"),
("10:31:20","cc"),
("10:31:33","bb"),
("10:32:10","dd"),
("10:37:11","dd"),
("10:36:20","cc")
)
spark.sparkContext.parallelize(seq_login).toDF("time","name")createOrReplaceTempView("logintable")
def main(args: Array[String]): Unit = {
//注册udf函数
spark.udf.register("formatTime2ms",formatTime2ms _)
val sql =
"""
| select temp.name,max(temp.cn) from (
|
| select a.name, a.time , count(1) cn
| from logintable a inner join logintable b
| on a.name = b.name
| and formatTime2ms(a.time)- formatTime2ms(b.time)<=3600000
| and formatTime2ms(a.time)- formatTime2ms(b.time)>=0
| group by a.time,a.name
|
| )temp
| group by temp.name
""".stripMargin
spark.sql(sql).show()
}
/**
* 将"HH:mm:ss" 格式时间转换毫秒值
* @param time HH:mm:ss" 格式时间
* @return
*/
def formatTime2ms( time:String): Long ={
//SimpleDateFormat是线程不安全的,解析的时候有些时间会解析错,使用FastDateFormat
//目标日期格式
val target_fomat: FastDateFormat = FastDateFormat.getInstance("HH:mm:ss")
target_fomat.parse(time).getTime
// def parse(time:String)={
// target_fomat.format(new Date(getTime(time)))
// }
}
}
统计每个用户一小时内最大的登录数
最新推荐文章于 2022-11-21 23:12:32 发布