电商平台分析平台----需求三:热门top10商品

做什么?

在符合条件的用户行为数据中,获取点击、下单和支付数量排名前10的品类。在Top10的排序中,按照点击数量、下单数量、支付数量的次序进行排序,即优先考虑点击数量。

需求分析

在这里插入图片描述
首先我们想要得到的是在符合条件的action中,统计排名前十的热门商品.并且排名的依据是根据点击数量、下单数量、支付数量的次序进行排序的.所以通过逆推:
top10商品–>(id,(clickCount=83|orderCount=67|payCount=63))------>分别统计(id,clickCount=…),(id,orderCount=…)--------------->需要得到符合条件的原始数据

步骤分析

  1. 得到符合需求一中过滤条件的原始数据—join算子
    val actionRdd=serverOne.getOriActionRDD(session,task);
    val sessionId2ActionRDD = actionRdd.map{
      item => (item.session_id, item)
    }
    val sessionId2FilterActionRDD=sessionId2ActionRDD.join(FilterInfo).map {
      case (sessionId,(action,info))=>{
        (sessionId,action);
      }
    }
  1. 获取所有发生过点击、下单、支付行为的categoryId
 var cid2CidRdd=sessionId2FilterActionRDD.flatMap{
      case(sessionId,action: UserVisitAction)=>{
        val categoryBuffer=new ArrayBuffer[(Long,Long)]();
        // 点击行为
        if(action.click_category_id != -1){
          categoryBuffer += ((action.click_category_id, action.click_category_id))
        }else if(action.order_category_ids != null){
          for(orderCid <- action.order_category_ids.split(","))
            categoryBuffer += ((orderCid.toLong, orderCid.toLong))
        }else if(action.pay_category_ids != null){
          for(payCid <- action.pay_category_ids.split(","))
            categoryBuffer += ((payCid.toLong, payCid.toLong))
        }
        categoryBuffer
      }
    }
    cid2CidRdd=cid2CidRdd.distinct();
  1. 分别统计点击、下单、支付行为的数量:
 // 第二步:统计品类的点击次数、下单次数、付款次数
    val cid2ClickCountRDD = getClickCount(sessionId2FilterActionRDD)

    val cid2OrderCountRDD = getOrderCount(sessionId2FilterActionRDD)

    val cid2PayCountRDD = getPayCount(sessionId2FilterActionRDD)
def getClickCount(sessionId2FilterActionRDD: RDD[(String, UserVisitAction)])={
     val clickFilterRDD=sessionId2FilterActionRDD.filter{
       case (sessionId,action: UserVisitAction)=>{
          action.click_category_id != -1L;
       }
     }
    val clickNumRDD = clickFilterRDD.map{
      case (sessionId, action) => (action.click_category_id, 1L)
    }

    clickNumRDD.reduceByKey(_+_)
  }
  def getOrderCount(sessionId2FilterActionRDD: RDD[(String, UserVisitAction)])={
     val orderFilterRDD=sessionId2FilterActionRDD.filter(item=>item._2.order_category_ids!=null)
     val orderNumRDD=orderFilterRDD.flatMap{
       case (sessionId,action)=>{

          for(id<-action.order_category_ids.split(",")){

          }
         action.order_category_ids.split(",").map(item=>(item.toLong,1L));
       }
     }
    orderNumRDD.reduceByKey(_+_);
  }
  def getPayCount(sessionId2FilterActionRDD: RDD[(String, UserVisitAction)]) = {
    val payFilterRDD = sessionId2FilterActionRDD.filter(item => item._2.pay_category_ids != null)

    val payNumRDD = payFilterRDD.flatMap{
      case (sid, action) =>
        action.pay_category_ids.split(",").map(item => (item.toLong, 1L))
    }

    payNumRDD.reduceByKey(_+_)
  }
  1. 用左连接算子,统计总的数据,最后格式为:categoryId,str,str代表总的数据:(clickCount=83|orderCount=67|payCount=63)
def getFullCount(cid2CidRDD: RDD[(Long, Long)], cid2ClickCountRDD: RDD[(Long, Long)], cid2OrderCountRDD: RDD[(Long, Long)], cid2PayCountRDD: RDD[(Long, Long)]) = {
    val cid2ClickInfoRDD=cid2CidRDD.leftOuterJoin(cid2ClickCountRDD).map{
      case (cId,(categoryId,option))=>{
        val clickCount=if (option.isDefined)option.getOrElse(0);
        val aggrCount = Constants.FIELD_CATEGORY_ID + "=" + cId + "|" +
          Constants.FIELD_CLICK_COUNT + "=" + clickCount

        (cId, aggrCount)
      }
    }
    val cid2OrderInfoRDD = cid2ClickInfoRDD.leftOuterJoin(cid2OrderCountRDD).map{
      case (cid, (clickInfo, option)) =>
        val orderCount = if(option.isDefined) option.get else 0
        val aggrInfo = clickInfo + "|" +
          Constants.FIELD_ORDER_COUNT + "=" + orderCount

        (cid, aggrInfo)
    }

    val cid2PayInfoRDD = cid2OrderInfoRDD.leftOuterJoin(cid2PayCountRDD).map{
      case (cid, (orderInfo, option)) =>
        val payCount = if(option.isDefined) option.get else 0
        val aggrInfo = orderInfo + "|" +
          Constants.FIELD_PAY_COUNT + "=" + payCount
        (cid, aggrInfo)
    }
    cid2PayInfoRDD;

  }
  1. 自定义排序器,将数据转化为(sortKey,info)格式,接着用sorkByKey及逆行排序
    自定义排序:
package server

case class SortKey(clickCount:Long, orderCount:Long, payCount:Long) extends Ordered[SortKey]{
  // this.compare(that)
  // this compare that
  // compare > 0   this > that
  // compare <0    this < that
  override def compare(that: SortKey): Int = {
    if(this.clickCount - that.clickCount != 0){
      return (this.clickCount - that.clickCount).toInt
    }else if(this.orderCount - that.orderCount != 0){
      return (this.orderCount - that.orderCount).toInt
    }else{
      return (this.payCount - that.payCount).toInt
    }
  }
}

val sortRDD=cid2FullCountRDD.map{
      case (cId,info)=>{
        val clickCount = StringUtil.getFieldFromConcatString(info, "\\|", Constants.FIELD_CLICK_COUNT).toLong
        val orderCount = StringUtil.getFieldFromConcatString(info, "\\|", Constants.FIELD_ORDER_COUNT).toLong
        val payCount = StringUtil.getFieldFromConcatString(info, "\\|", Constants.FIELD_PAY_COUNT).toLong

        val sortKey = SortKey(clickCount, orderCount, payCount)
        (sortKey, info)
      }
    }
    //5.排序
    val top10=sortRDD.sortByKey(false).take(10);
  1. 数据封装,写入数据库
//6.封装数据,写进数据库
    val top10CategoryRDD = sparkSession.sparkContext.makeRDD(top10).map{
      case (sortKey, countInfo) =>
        val cid = StringUtil.getFieldFromConcatString(countInfo, "\\|", Constants.FIELD_CATEGORY_ID).toLong
        val clickCount = sortKey.clickCount
        val orderCount = sortKey.orderCount
        val payCount = sortKey.payCount
        Top10Category(taskUUID, cid, clickCount, orderCount, payCount)
    }

    //保存到数据库
    /* import sparkSession.implicits._
     top10CategoryRDD.toDF().write
       .format("jdbc")
       .option("url", ConfigurationManager.config.getString(Constants.JDBC_URL))
       .option("user", ConfigurationManager.config.getString(Constants.JDBC_USER))
       .option("password", ConfigurationManager.config.getString(Constants.JDBC_PASSWORD))
       .option("dbtable", "top10_category_0308")
       .mode(SaveMode.Append)
       .save*/

完整代码:

package server

import commons.constant.Constants
import commons.model.{Top10Category, UserVisitAction}
import commons.utils.StringUtil
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SparkSession

import scala.collection.mutable.ArrayBuffer

class serverThree  extends Serializable {




  def top10PopularCategories(sparkSession: SparkSession,
                             taskUUID: String,
                             sessionId2FilterActionRDD: RDD[(String, UserVisitAction)])={
    //1.将所有基本数据,转化成(cId,cId)格式的总数据
    var cid2CidRdd=sessionId2FilterActionRDD.flatMap{
      case(sessionId,action: UserVisitAction)=>{
        val categoryBuffer=new ArrayBuffer[(Long,Long)]();
        // 点击行为
        if(action.click_category_id != -1){
          categoryBuffer += ((action.click_category_id, action.click_category_id))
        }else if(action.order_category_ids != null){
          for(orderCid <- action.order_category_ids.split(","))
            categoryBuffer += ((orderCid.toLong, orderCid.toLong))
        }else if(action.pay_category_ids != null){
          for(payCid <- action.pay_category_ids.split(","))
            categoryBuffer += ((payCid.toLong, payCid.toLong))
        }
        categoryBuffer
      }
    }
    cid2CidRdd=cid2CidRdd.distinct();
    // 第二步:统计品类的点击次数、下单次数、付款次数
    val cid2ClickCountRDD = getClickCount(sessionId2FilterActionRDD)

    val cid2OrderCountRDD = getOrderCount(sessionId2FilterActionRDD)

    val cid2PayCountRDD = getPayCount(sessionId2FilterActionRDD)

    //3.根据左连接,将总的数据cid2CidRdd和第二部得到的数据一个个进行连接,创造出cid:str
    //其中,str代表count=32|order=15.......
    val cid2FullCountRDD =  getFullCount(cid2CidRdd,cid2ClickCountRDD,cid2OrderCountRDD,cid2PayCountRDD);

    //4.自定义排序器,将数据转化为(sortKey,info)
    val sortRDD=cid2FullCountRDD.map{
      case (cId,info)=>{
        val clickCount = StringUtil.getFieldFromConcatString(info, "\\|", Constants.FIELD_CLICK_COUNT).toLong
        val orderCount = StringUtil.getFieldFromConcatString(info, "\\|", Constants.FIELD_ORDER_COUNT).toLong
        val payCount = StringUtil.getFieldFromConcatString(info, "\\|", Constants.FIELD_PAY_COUNT).toLong

        val sortKey = SortKey(clickCount, orderCount, payCount)
        (sortKey, info)
      }
    }
    //5.排序
    val top10=sortRDD.sortByKey(false).take(10);
    top10.foreach(println);
    //6.封装数据,写进数据库
    val top10CategoryRDD = sparkSession.sparkContext.makeRDD(top10).map{
      case (sortKey, countInfo) =>
        val cid = StringUtil.getFieldFromConcatString(countInfo, "\\|", Constants.FIELD_CATEGORY_ID).toLong
        val clickCount = sortKey.clickCount
        val orderCount = sortKey.orderCount
        val payCount = sortKey.payCount
        Top10Category(taskUUID, cid, clickCount, orderCount, payCount)
    }

    //保存到数据库
    /* import sparkSession.implicits._
     top10CategoryRDD.toDF().write
       .format("jdbc")
       .option("url", ConfigurationManager.config.getString(Constants.JDBC_URL))
       .option("user", ConfigurationManager.config.getString(Constants.JDBC_USER))
       .option("password", ConfigurationManager.config.getString(Constants.JDBC_PASSWORD))
       .option("dbtable", "top10_category_0308")
       .mode(SaveMode.Append)
       .save*/
    top10

  }

  def getFullCount(cid2CidRDD: RDD[(Long, Long)], cid2ClickCountRDD: RDD[(Long, Long)], cid2OrderCountRDD: RDD[(Long, Long)], cid2PayCountRDD: RDD[(Long, Long)]) = {
    val cid2ClickInfoRDD=cid2CidRDD.leftOuterJoin(cid2ClickCountRDD).map{
      case (cId,(categoryId,option))=>{
        val clickCount=if (option.isDefined)option.getOrElse(0);
        val aggrCount = Constants.FIELD_CATEGORY_ID + "=" + cId + "|" +
          Constants.FIELD_CLICK_COUNT + "=" + clickCount

        (cId, aggrCount)
      }
    }
    val cid2OrderInfoRDD = cid2ClickInfoRDD.leftOuterJoin(cid2OrderCountRDD).map{
      case (cid, (clickInfo, option)) =>
        val orderCount = if(option.isDefined) option.get else 0
        val aggrInfo = clickInfo + "|" +
          Constants.FIELD_ORDER_COUNT + "=" + orderCount

        (cid, aggrInfo)
    }

    val cid2PayInfoRDD = cid2OrderInfoRDD.leftOuterJoin(cid2PayCountRDD).map{
      case (cid, (orderInfo, option)) =>
        val payCount = if(option.isDefined) option.get else 0
        val aggrInfo = orderInfo + "|" +
          Constants.FIELD_PAY_COUNT + "=" + payCount
        (cid, aggrInfo)
    }
    cid2PayInfoRDD;

  }


  def getClickCount(sessionId2FilterActionRDD: RDD[(String, UserVisitAction)])={
     val clickFilterRDD=sessionId2FilterActionRDD.filter{
       case (sessionId,action: UserVisitAction)=>{
          action.click_category_id != -1L;
       }
     }
    val clickNumRDD = clickFilterRDD.map{
      case (sessionId, action) => (action.click_category_id, 1L)
    }

    clickNumRDD.reduceByKey(_+_)
  }
  def getOrderCount(sessionId2FilterActionRDD: RDD[(String, UserVisitAction)])={
     val orderFilterRDD=sessionId2FilterActionRDD.filter(item=>item._2.order_category_ids!=null)
     val orderNumRDD=orderFilterRDD.flatMap{
       case (sessionId,action)=>{

          for(id<-action.order_category_ids.split(",")){

          }
         action.order_category_ids.split(",").map(item=>(item.toLong,1L));
       }
     }
    orderNumRDD.reduceByKey(_+_);
  }
  def getPayCount(sessionId2FilterActionRDD: RDD[(String, UserVisitAction)]) = {
    val payFilterRDD = sessionId2FilterActionRDD.filter(item => item._2.pay_category_ids != null)

    val payNumRDD = payFilterRDD.flatMap{
      case (sid, action) =>
        action.pay_category_ids.split(",").map(item => (item.toLong, 1L))
    }

    payNumRDD.reduceByKey(_+_)
  }

}

排序器:

package server

case class SortKey(clickCount:Long, orderCount:Long, payCount:Long) extends Ordered[SortKey]{
  // this.compare(that)
  // this compare that
  // compare > 0   this > that
  // compare <0    this < that
  override def compare(that: SortKey): Int = {
    if(this.clickCount - that.clickCount != 0){
      return (this.clickCount - that.clickCount).toInt
    }else if(this.orderCount - that.orderCount != 0){
      return (this.orderCount - that.orderCount).toInt
    }else{
      return (this.payCount - that.payCount).toInt
    }
  }
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值