/**
* 入口类
*/
object TopNJob {
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder().appName("TopNJob").master("local[2]").getOrCreate()
val df = spark.read.format("parquet").load("file:///D:\\test_data\\log\\cleanLog\\part-*")
df.createOrReplaceTempView("tb_course")
//最受欢迎的TopN课程
mostPopular(spark, df, "20180110")
spark.stop()
}
//Row(ip, url, cmsType, cmsId, traffic, city, time, day)
def mostPopular(spark: SparkSession, df: DataFrame, day: String): Unit = {
//sparksql 逻辑处理业务得到结果
val result = spark.sql("select day ,cmsId, count(1) as times from tb_course " +
"where day = " + day + " and cmsType='video' " +
"group by cmsId,day order by times desc limit 10 ")
//处理结果保存到数据库中
result.foreachPartition(x => {
var list = new ListBuffer[DayVideoVisitBean]
x.foreach(y => {
val day = y.getAs[String]("day")
val cmsId = y.getAs[Long]("cmsId")
val times = y.getAs[Long]("times")
list.append(DayVideoVisitBean(day, cmsId, times))
})
TopNDao.insertDayVideoAccessTopN(list)
})
}
}
// 实体类
case class DayVideoVisitBean(day:String ,cmsId :Long,times:Long)