package demo
import java.text.SimpleDateFormat
import java.util.ArrayList
import java.util.Calendar
import scala.collection.JavaConversions._
import org.apache.spark.sql.{
DataFrame, SparkSession}
import scala.collection.mutable.ArrayBuffer
import scala.util.control.Breaks
object CompareFlowDemo {
var activateRepeatPath:Set[Int] = Set() //重复的路径集合
def main(args:Array[String]) {
val conf = SparkSession
.builder()
.appName("spark dataframe test").master("local")
val spark = conf.getOrCreate()
/**
* 获取当前日期
*/
var dateFormat:SimpleDateFormat = new SimpleDateFormat("yyyyMMdd")
var cal1:Calendar=Calendar.getInstance()
var thisDay=dateFormat.format(cal1.getTime())
val nowDay : String = thisDay.format()
/**
* 获取三天之前日期
*/
var cal3:Calendar=Calendar.getInstance()
cal3.add(Calendar.DATE,-3)
var threeDay=dateFormat.format(cal3.getTime())
val threeDayAgo : String = threeDay.format()
/**
* 初始化历史路径
* (路径编号、路径、路径步长、频次、创建时间、最新有效时间、是否生效、分区日期)
*/
var repeatHisPathData = spark.createDataFrame(List(
(1,"a->b->c->d->e->", 5, 182,"20200630","20200716",1,"20200717"),
(2,"h->g->e->", 3, 22,"20200630","20200710",1,"20200717")
)) toDF("flow_id","paths", "step", "freq","create_dt","active_dt","is_enable","dt")
/**
* 初始化当日新增路径
*/
val repeatNewPathData=new ArrayList[(String,Int,Int)]
repeatNewPathData.add(("a->b->c->d->e->",5,2)) //路径、步长、频次
repeatNewPathData.add(("h->g->e->i->",4,2))
repeatNewPathData.add(("h->g->e->",3,2))
/**
* 第一步:新增路径中的重复性判断,并标记
*/
val afterRepeatCom = isInHistPath(repeatNewPathData,repeatHisPathData)
println(afterRepeatCom)
/
用户频繁路径更新
最新推荐文章于 2021-06-21 12:04:31 发布
本文探讨了如何分析用户在应用或网站中的频繁路径,并介绍了一种实现实时更新这些路径的方法。通过深入理解用户的交互行为,可以优化用户体验,提高用户留存率。
摘要由CSDN通过智能技术生成