【大数据spark SQL项目实战】日志分析(六):按地市统计主站最受欢迎的Top N的课程并写入mysql

35 篇文章 1 订阅
19 篇文章 0 订阅

将清洗的数据存储到目标地址

1.按地市统计最受欢迎的TOP3的课程
package com.kinglone.log

import org.apache.spark.sql.expressions.Window
import org.apache.spark.sql.{DataFrame, SparkSession}
import org.apache.spark.sql.functions._

import scala.collection.mutable.ListBuffer

/**
  * TopN统计Spark作业
  */
object TopNStatJob {


  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder().appName("TopNStatJob")
      .config("spark.sql.sources.partitionColumnTypeInference.enabled","false") //分区字段的数据类型调整【禁用】
      .master("local[2]").getOrCreate()

    val accessDF = spark.read.format("parquet").load("file:///D:/test/clean")

   // accessDF.printSchema()
   // accessDF.show(false)
   val day = "20170511"
   
    //按地市统计imcco主站最受欢迎的Top N的课程
    cityAccessTopNStat(spark, accessDF, day)

    spark.close()
  }

  /**
    * 按地市统计imcco主站最受欢迎的Top N的课程
    * @param spark
    * @param accessDF
    * @param day
    */
  def cityAccessTopNStat(spark: SparkSession, accessDF: DataFrame, day: String) ={
      import spark.implicits._
      val cityVideoAccessTopNDF = accessDF.filter($"day" === day && $"cmsType" === "video")
        .groupBy("day","city", "cmsId").agg(count("cmsId").as("times"))
      cityVideoAccessTopNDF.show(false)

      //Window函数在Spark SQL的使用
      val top3DF = cityVideoAccessTopNDF.select(
        cityVideoAccessTopNDF("day"),
        cityVideoAccessTopNDF("city"),
        cityVideoAccessTopNDF("cmsId"),
        cityVideoAccessTopNDF("times"),
        row_number().over(Window.partitionBy(cityVideoAccessTopNDF("city")).orderBy(cityVideoAccessTopNDF("times").desc))
          .as("times_rank")
      ).filter("times_rank <=3")

      top3DF.show(false)

    /**
      * 将统计结果写入到MySQL中
      */
    try {
      top3DF.foreachPartition(partitionOfRecords => {
        val list = new ListBuffer[DayCityVideoAccessStat]

        partitionOfRecords.foreach(info => {
          val day = info.getAs[String]("day")
          val cmsId = info.getAs[Long]("cmsId")
          val city = info.getAs[String]("city")
          val times = info.getAs[Long]("times")
          val timesRank = info.getAs[Int]("times_rank")

          list.append(DayCityVideoAccessStat(day, cmsId,city, times,timesRank))
        })

        StatDAO.insertCityDayVideoAccessTopN(list)
      })
    } catch {
      case e:Exception => e.printStackTrace()
    }
  }
}

DayCityVideoAccessSta:实体类

case class DayCityVideoAccessStat(day:String, cmsId:Long, city:String,times:Long,timesRank:Int)

StatDAO :

package com.kinglone.log

import java.sql.{PreparedStatement, Connection}

import scala.collection.mutable.ListBuffer

/**
 * 各个维度统计的DAO操作
 */
object StatDAO {



  /**
    * 批量保存DayCityVideoAccessStat到数据库
    */
  def insertCityDayVideoAccessTopN(list: ListBuffer[DayCityVideoAccessStat]): Unit = {

    var connection: Connection = null
    var pstmt: PreparedStatement = null

    try {
      connection = MySQLUtils.getConnection()

      connection.setAutoCommit(false) //设置手动提交

      val sql = "insert into day_video_city_access_topn_stat(day,cms_id,city,times,times_rank ) values (?,?,?,?,?) "
      pstmt = connection.prepareStatement(sql)

      for (ele <- list) {
        pstmt.setString(1, ele.day)
        pstmt.setLong(2, ele.cmsId)
        pstmt.setString(3, ele.city)
        pstmt.setLong(4, ele.times)
        pstmt.setInt(5, ele.timesRank)

        pstmt.addBatch()
      }

      pstmt.executeBatch() // 执行批量处理
      connection.commit() //手工提交
    } catch {
      case e: Exception => e.printStackTrace()
    } finally {
      MySQLUtils.release(connection, pstmt)
    }
  }
}

效果:
在这里插入图片描述
select * from day_video_city_access_topn_stat t ORDER BY t.city desc ,t.times_rank asc
在这里插入图片描述

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值