案例一:连续活跃用户案例
#SQL风格
import org.apache.spark.sql.catalyst.plans.logical.Window
import org.apache.spark.sql.{DataFrame, SparkSession}
//连续活跃用户案例
//求连续登录天数大于或等于两天的用户记录
object ContenueActiveUser_SQL {
def main(args: Array[String]): Unit = {
//获取session
val session: SparkSession = SparkSession
.builder()
.master("local[*]")
.appName("")
.getOrCreate()
//纯SQL进行查询
val df: DataFrame = session
.read
.option("header", "true")
.csv("Files/data1.txt")
// df.show()
df.createTempView("view_log")
/**
* 一、SQL风格的写法
*/
val df2: DataFrame = session.sql(
"""
|
|select
|uid,
|min(dt) as min_dt,
|max(dt) as max_dt,
|count(date_diff) as times
|from
|(select
|uid,
|dt,
|date_sub(dt,dt_num) as date_diff
| from
| (
| select
| uid,
| dt,
| row_number() over(partition by uid order by dt asc) as dt_num
| from
| (
| select
| distinct(uid,dt),uid,dt
| from view_log
| )t1
| )t2)
| group by uid,date_diff having times>=3
|""".stripMargin)
// df2.show()
// session.stop()
}
}
#DSL风格
import org.apache.spark.sql.expressions.Window
import org.apache.spark.sql.{DataFrame, SparkSessi