数据:
1001,2020-09-10 10:21:21,home.html
1001,2020-09-10 10:28:10,good_list.html
1002,2020-09-10 09:40:00,home.html
1001,2020-09-10 10:35:05,good_detail.html
1002,2020-09-10 09:42:00,favor.html
1001,2020-09-10 10:42:55,cart.html
1001,2020-09-10 10:43:55,11.html
1001,2020-09-10 10:44:55,22.html
1001,2020-09-10 10:45:55,33.html
1001,2020-09-10 10:46:55,44.html
1001,2020-09-10 10:47:55,55.html
1001,2020-09-10 10:48:55,66.html
1001,2020-09-10 10:49:55,77.html
1002,2020-09-10 09:41:00,mine.html
1001,2020-09-10 11:35:21,home.html
1001,2020-09-10 11:36:10,cart.html
1003,2020-09-10 13:10:00,home.html
1001,2020-09-10 11:38:12,trade.html
1001,2020-09-10 11:39:12,aa.html
1001,2020-09-10 11:40:12,bb.html
1001,2020-09-10 11:41:12,cc.html
1001,2020-09-10 11:42:12,dd.html
1001,2020-09-10 11:43:12,ee.html
1001,2020-09-10 11:44:12,ff.html
1001,2020-09-10 11:45:12,gg.html
1001,2020-09-10 11:46:12,hh.html
1001,2020-09-10 11:47:12,ll.html
1001,2020-09-10 11:38:55,payment.html
1003,2020-09-10 13:15:00,search.html
需求:求得用户每次会话的行为轨迹--解决数据倾斜
import java.text.SimpleDateFormat
import java.util
import java.util.UUID
import org.apache.spark.{Partition, RangePartitioner}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.Dataset
object SessionTest2 {
def main(args: Array[String]): Unit = {
//需求:求得用户每次会话的行为轨迹--解决数据倾斜
import org.apache.spark.sql.SparkSession
val spark = SparkSession.builder().master("local[4]").appName("test").getOrCreate()
import spark.implicits._
//1、读取数据
val ds = spark.read.csv("datas/session2.txt").toDF("user_id", "page_time", "page").as[(String, String, String)]
//获取一个集合累加器
val acc = spark.sparkContext.collectionAccumulator[(String, UserAnalysis)]("acc")
//2、转换数据类型--样例类(转成样例类方便修改值)
val ds2: Dataset[(String, UserAnalysis)] = ds.map {
case (userid, timestr, page) =>
//获取时间戳
val formatter = new SimpleDateFormat("