spark算子练习

一、数据准备

创建student文件

12 张三 25 男 chinese 50
12 张三 25 男 math 60
12 张三 25 男 english 70
12 李四 20 男 chinese 50
12 李四 20 男 math 50
12 李四 20 男 english 50
12 王芳 19 女 chinese 70
12 王芳 19 女 math 70
12 王芳 19 女 english 70
13 张大三 25 男 chinese 60
13 张大三 25 男 math 60
13 张大三 25 男 english 70
13 李大四 20 男 chinese 50
13 李大四 20 男 math 60
13 李大四 20 男 english 50
13 王小芳 19 女 chinese 70
13 王小芳 19 女 math 80
13 王小芳 19 女 english 70

二、题目及答案

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object Work02 extends App {
  //创建一个 spark context 对象
  val conf: SparkConf = new SparkConf().setMaster("local[6]").setAppName("sparkTest")
  val sc: SparkContext = SparkContext.getOrCreate(conf)
  
	//创建file算子
  val file: RDD[String] = sc.textFile("D:\\project\\day0804\\src\\data\\student.txt")

	//创建student算子
  val student: RDD[(Int,String,Int,String,String,Int)] = file.map(x => {
    val id = x.split(" ").apply(0).toInt
    val name = x.split(" ").apply(1)
    val age = x.split(" ").apply(2).toInt
    val gender = x.split(" ").apply(3)
    val course = x.split(" ").apply(4)
    val score = x.split(" ").apply(5).toInt
    (id,name,age,gender,course,score)
  })

  //TODO 1. 一共有多少人参加考试?
  println("一共有"+student.map(x => x._2).distinct.count+"人参加考试")

  //TODO 1.1 一共有多少个小于 20 岁的人参加考试?
  println("一共有"+student.filter(x => x._3 < 20).map(x => x._2).distinct.count+"个小于 20 岁的人参加考试")

  //TODO 1.2 一共有多少个等于 20 岁的人参加考试?
  println("一共有"+student.filter(x => x._3 == 20).map(x => x._2).distinct.count+"个等于 20 岁的人参加考试")

  //TODO 1.3 一共有多少个大于 20 岁的人参加考试?
  println("一共有"+student.filter(x => x._3 > 20).map(x => x._2).distinct.count+"个大于 20 岁的人参加考试")

  //TODO 2 一共有多个男生参加考试?
  println("一共有"+student.filter(x => x._4 == "男").map(x => x._2).distinct.count+"个男生参加考试")

  //TODO 2.1一共有多少个女生参加考试?
  println("一共有"+student.filter(x => x._4 == "女").map(x => x._2).distinct.count+"个女生参加考试")

  //TODO 3. 12 班有多少人参加考试?
  println("12 班有"+student.filter(x => x._1 == 12).map(x => x._2).distinct.count+"人参加考试")

  //TODO 3.1 13 班有多少人参加考试?
  println("13 班有"+student.filter(x => x._1 == 13).map(x => x._2).distinct.count+"人参加考试")

  //TODO 4. 语文科目的平均成绩是多少?
  val sum: Double = student.filter(x => x._5 == "chinese").map(x => x._6).reduce(_+_).toDouble
  val num: Double = student.filter(x => x._5 == "chinese").count.toDouble
  println("语文平均成绩是"+(sum12/num).round)

  //TODO 4.1 数学科目的平均成绩是多少?
  val sumMath: Double = student.filter(x => x._5 == "math").map(x => x._6).reduce(_+_).toDouble
  val numMath: Double = student.filter(x => x._5 == "math").count.toDouble
  println("数学平均成绩是"+(sumMath/numMath).round)

  //TODO 4.2 英语科目的平均成绩是多少?
  val sumEnglish: Double = student.filter(x => x._5 == "english").map(x => x._6).reduce(_+_).toDouble
  val numEnglish: Double = student.filter(x => x._5 == "english").count.toDouble
  println("英语平均成绩是"+(sumEnglish/numEnglish).round)

  //TODO 6. 12 班平均成绩是多少?
  val sum12: Double = student.filter(x =>x._1==12).map(x => x._6).sum
  val count12: Long = student.filter(x => x._1==12).count
  println("12班平均成绩"+sum12/count12)

  //TODO 6.1 12 班男生平均总成绩是多少?
  val sum12man: Double = student.filter(x =>x._1==12 && x._4=="男").map(x => x._6).sum
  val count12man: Long = student.filter(x => x._1==12 && x._4=="男").count
  println("12班男生平均成绩"+sum12man/count12man)

  //TODO 6.2 12 班女生平均总成绩是多少?
  val sum12female: Double = student.filter(x =>x._1==12 && x._4=="女").map(x => x._6).sum
  val count12female: Long = student.filter(x => x._1==12 && x._4=="女").count
  println("12班女生平均成绩"+sum12female/count12female)

  //TODO 6.3 同理求 13 班相关成绩

  //TODO 6. 13 班平均成绩是多少?
  val sum13: Double = student.filter(x =>x._1==13).map(x => x._6).sum
  val count13: Long = student.filter(x => x._1==13).count
  println("13班平均成绩"+(sum13/count13).round)

  //TODO 6.1 13 班男生平均总成绩是多少?
  val sum13man: Double = student.filter(x =>x._1==13 && x._4=="男").map(x => x._6).sum
  val count13man: Long = student.filter(x => x._1==13 && x._4=="男").count
  println("13班男生平均成绩"+(sum13man/count13man).round)

  //TODO 6.2 13 班女生平均总成绩是多少?
  val sum13female: Double = student.filter(x =>x._1==13 && x._4=="女").map(x => x._6).sum
  val count13female: Long = student.filter(x => x._1==13 && x._4=="女").count
  println("13班女生平均成绩"+(sum13female/count13female).round)

  //TODO 7. 全校语文成绩最高分是多少?
  println("全校英语最高分为:"+student.filter(x => x._5 == "english").map(x => x._6).max)

  //TODO 7.1 12 班语文成绩最低分是多少?
  val minChinese: Int = student.filter(x => x._5=="chinese" && x._1==12).map(x => x._6).min
  println("12语文成绩最低分为:"+minChinese)

  //TODO 7.2 13 班数学最高成绩是多少?
  println("13 班数学最高成绩是:"+student.filter(x => x._1 == 12 && x._5 == "math").map(x => x._6).max)

  //TODO 8. 总成绩大于 150 分的 12 班的女生有几个?
  private val tmp: Array[Iterable[Int]] = student.filter(x => x._1==12 && x._4=="女").groupBy(x => x._2).map(x => x._2.map(x => x._6)).collect()
  println("成绩大于 150 分的 12 班的女生有"+tmp.map(_.sum).toList.count(x => true)+"个")
}
©️2020 CSDN 皮肤主题: 技术工厂 设计师:CSDN官方博客 返回首页