简易电影TOP10分析系统

package com.bj.scalacode

import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession

object RDD_Movie_Users_Analyzer6 {
  def main(args: Array[String]): Unit = {
    //1.配置SparkConf
    val conf = new SparkConf().setMaster("local[*]").setAppName("RDD_Movie_Users_Analyzer6")
    //2.创建SparkSession
    val spark = SparkSession.builder().config(conf).getOrCreate()
    //3.创建SparkSession的SparkContext
    val sc = spark.sparkContext
    //设置Spark程序运行的日志显示级别为("warn")
    sc.setLogLevel("warn")
    //4.获取需要用到数据的RDD
    val movieRDD = sc.textFile("./data/movies.dat")
    val userRDD = sc.textFile("./data/users.dat")
    val ratingRDD = sc.textFile("./data/ratings.dat")
    //5.获取MovieID、MovieName
    val movieinfo = movieRDD.map(m => (m.split("::"))).map(x => (x(0), x(1)))
    //获取UserID,MovieID、Rating
    val rating = ratingRDD.map(r => (r.split("::"))).map(x => (x(0), x(1), x(2)))
    //6.获取总的电影评分和总的点评人数
    val movieAndRating = rating.map(m => (m._2, (m._3.toDouble, 1))).reduceByKey((x, y) => (x._1 + y._1, x._2 + y._2))
    //7.获取电影ID和电影平均评分
    val avgRating = movieAndRating.map(m => (m._1, (m._2._1.toDouble / m._2._2)))
    //8.打印出最受欢迎的前12部电影和平均评分
    println("打印出最受欢迎的前12部电影和平均评分:")
    avgRating.join(movieinfo).map(item => (item._2._1, item._2._2)).sortByKey(false).take(12)
      .foreach(record => println(record._2 + " 的平均评分:" + record._1))
    //9.所有电影中最受男生喜欢的电影TOP10

    val userGender = userRDD.map(u => (u.split("::"))).map(x => (x(0), x(1)))
    //取出userID,gender
    val genderRating = rating.map(x => (x._1, (x._1, x._2, x._3))).join(userGender).cache() //男生女生对电影的评分
    // genderRating.take(10).foreach(println)

    //使用join连接 ratings和user之后,分别过滤出男性和女性的记录进行处理
    val maleFilterRatings = genderRating.filter(x => x._2._2.equals("M")).map(x => x._2._1)
    //男
    val femaleFilterRatings = genderRating.filter(x => x._2._2.equals("F")).map(x => x._2._1) //女
    println("所有电影中最受男生喜欢的电影TOP10:")
    maleFilterRatings.map(x => (x._2, (x._3.toDouble, 1))).reduceByKey((x, y) => (x._1 + y._1, x._2 + y._2))
      .map(x => (x._1, x._2._1.toDouble / x._2._2)).join(movieinfo).map(item => (item._2._1, item._2._2))
      .sortByKey(false).take(10).foreach(record => println(record._2 + "平均评分:" + record._1))
    println("所有电影中最受女生喜欢的电影TOP10:")
    femaleFilterRatings.map(x => (x._2, (x._3.toDouble, 1))).reduceByKey((x, y) => (x._1 + y._1, x._2 + y._2))
      .map(x => (x._1, (x._2._1.toDouble / x._2._2))).join(movieinfo).map(item => (item._2._1, item._2._2))
      .sortByKey(false).take(10).foreach(record => println(record._2 + "平均评分:" + record._1))

    //关闭SparkSession
    sc.stop()
  }
}

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值