一、数据结构
id,date,visitCount
二、需求
![在这里插入图片描述](https://img-blog.csdnimg.cn/fe3372b414e14613b78d776be9485af2.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl80NDA3OTYzNg==,size_16,color_FFFFFF,t_70)
三、实现
import org.apache.spark.sql.{Dataset, SparkSession}
case class User(id:String,date:String,vc:Long)
object Test0 {
def main(args: Array[String]): Unit = {
val spark: SparkSession = SparkSession.builder()
.master("local[*]").appName("homework").getOrCreate()
val users: Dataset[String] = spark.read.textFile("f://data//user.txt")
import spark.implicits._
val ds: Dataset[User] = users.map(v => {
val arr: Array[String] = v.split(" ")
new User(arr(0), arr(1), arr(2).toLong)
})
ds.createTempView("t_user")
spark.sql("select * from t_user").show()
var sql2=
"""
|
|select t2.* ,sum(x1) over(partition by id order by date) x2
|from (
|select id,date,sum(vc) x1
|from (select id,date_format(to_date(date,'yyyy/M/dd'),'y-M') date,vc
|from t_user) t1
|group by id,date
|order by id,date
|) t2
|
|
|""".stripMargin
spark.sql(sql2).show()
}
}
![在这里插入图片描述](https://img-blog.csdnimg.cn/3782c2b01f114dceb476b9fbece25bee.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl80NDA3OTYzNg==,size_16,color_FFFFFF,t_70)
四、数据
u01 2017/1/21 5
u02 2017/1/23 6
u03 2017/1/22 8
u04 2017/1/20 3
u01 2017/1/23 6
u01 2017/2/21 8
u02 2017/1/23 6
u01 2017/2/22 4