一、数据准备
创建student文件
12 张三 25 男 chinese 50
12 张三 25 男 math 60
12 张三 25 男 english 70
12 李四 20 男 chinese 50
12 李四 20 男 math 50
12 李四 20 男 english 50
12 王芳 19 女 chinese 70
12 王芳 19 女 math 70
12 王芳 19 女 english 70
13 张大三 25 男 chinese 60
13 张大三 25 男 math 60
13 张大三 25 男 english 70
13 李大四 20 男 chinese 50
13 李大四 20 男 math 60
13 李大四 20 男 english 50
13 王小芳 19 女 chinese 70
13 王小芳 19 女 math 80
13 王小芳 19 女 english 70
二、题目及答案
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object Work02 extends App {
//创建一个 spark context 对象
val conf: SparkConf = new SparkConf().setMaster("local[6]").setAppName("sparkTest")
val sc: SparkContext = SparkContext.getOrCreate(conf)
//创建file算子
val file: RDD[String] = sc.textFile("D:\\project\\day0804\\src\\data\\student.txt")
//创建student算子
val student: RDD[(Int,String,Int,String,String,Int)] = file.map(x => {
val id = x.split(" ").apply(0).toInt
val name = x.split(" ").apply(1)
val age = x.split(" ").apply(2).toInt
val gender = x.split(" ").apply(3)
val course = x.split(" ").apply(4)
val score = x.split(" ").apply(5).toInt
(id,name,age,gender,course,score)
})
//TODO 1. 一共有多少人参加考试?
println("一共有"+student.map(x => x._2).distinct.count+"人参加考试")
//TODO 1.1 一共有多少个小于 20 岁的人参加考试?
println("一共有"+student.filter(x => x._3 < 20).map(x => x._2).distinct.count+"个小于 20 岁的人参加考试")
//TODO 1.2 一共有多少个等于 20 岁的人参加考试?
println("一共有"+student.filter(x => x._3 == 20).map(x => x._2).distinct.count+"个等于 20 岁的人参加考试")
//TODO 1.3 一共有多少个大于 20 岁的人参加考试?
println("一共有"+student.filter(x => x._3 > 20).map(x => x._2).distinct.count+"个大于 20 岁的人参加考试")
//TODO 2 一共有多个男生参加考试?
println("一共有"+student.filter(x => x._4 == "男").map(x => x._2).distinct.count+"个男生参加考试")
//TODO 2.1一共有多少个女生参加考试?
println("一共有"+student.filter(x => x._4 == "女").map(x => x._2).distinct.count+"个女生参加考试")
//TODO 3. 12 班有多少人参加考试?
println("12 班有"+student.filter(x => x._1 == 12).map(x => x._2).distinct.count+"人参加考试")
//TODO 3.1 13 班有多少人参加考试?
println("13 班有"+student.filter(x => x._1 == 13).map(x => x._2).distinct.count+"人参加考试")
//TODO 4. 语文科目的平均成绩是多少?
val sum: Double = student.filter(x => x._5 == "chinese").map(x => x._6).reduce(_+_).toDouble
val num: Double = student.filter(x => x._5 == "chinese").count.toDouble
println("语文平均成绩是"+(sum12/num).round)
//TODO 4.1 数学科目的平均成绩是多少?
val sumMath: Double = student.filter(x => x._5 == "math").map(x => x._6).reduce(_+_).toDouble
val numMath: Double = student.filter(x => x._5 == "math").count.toDouble
println("数学平均成绩是"+(sumMath/numMath).round)
//TODO 4.2 英语科目的平均成绩是多少?
val sumEnglish: Double = student.filter(x => x._5 == "english").map(x => x._6).reduce(_+_).toDouble
val numEnglish: Double = student.filter(x => x._5 == "english").count.toDouble
println("英语平均成绩是"+(sumEnglish/numEnglish).round)
//TODO 6. 12 班平均成绩是多少?
val sum12: Double = student.filter(x =>x._1==12).map(x => x._6).sum
val count12: Long = student.filter(x => x._1==12).count
println("12班平均成绩"+sum12/count12)
//TODO 6.1 12 班男生平均总成绩是多少?
val sum12man: Double = student.filter(x =>x._1==12 && x._4=="男").map(x => x._6).sum
val count12man: Long = student.filter(x => x._1==12 && x._4=="男").count
println("12班男生平均成绩"+sum12man/count12man)
//TODO 6.2 12 班女生平均总成绩是多少?
val sum12female: Double = student.filter(x =>x._1==12 && x._4=="女").map(x => x._6).sum
val count12female: Long = student.filter(x => x._1==12 && x._4=="女").count
println("12班女生平均成绩"+sum12female/count12female)
//TODO 6.3 同理求 13 班相关成绩
//TODO 6. 13 班平均成绩是多少?
val sum13: Double = student.filter(x =>x._1==13).map(x => x._6).sum
val count13: Long = student.filter(x => x._1==13).count
println("13班平均成绩"+(sum13/count13).round)
//TODO 6.1 13 班男生平均总成绩是多少?
val sum13man: Double = student.filter(x =>x._1==13 && x._4=="男").map(x => x._6).sum
val count13man: Long = student.filter(x => x._1==13 && x._4=="男").count
println("13班男生平均成绩"+(sum13man/count13man).round)
//TODO 6.2 13 班女生平均总成绩是多少?
val sum13female: Double = student.filter(x =>x._1==13 && x._4=="女").map(x => x._6).sum
val count13female: Long = student.filter(x => x._1==13 && x._4=="女").count
println("13班女生平均成绩"+(sum13female/count13female).round)
//TODO 7. 全校语文成绩最高分是多少?
println("全校英语最高分为:"+student.filter(x => x._5 == "english").map(x => x._6).max)
//TODO 7.1 12 班语文成绩最低分是多少?
val minChinese: Int = student.filter(x => x._5=="chinese" && x._1==12).map(x => x._6).min
println("12语文成绩最低分为:"+minChinese)
//TODO 7.2 13 班数学最高成绩是多少?
println("13 班数学最高成绩是:"+student.filter(x => x._1 == 12 && x._5 == "math").map(x => x._6).max)
//TODO 8. 总成绩大于 150 分的 12 班的女生有几个?
private val tmp: Array[Iterable[Int]] = student.filter(x => x._1==12 && x._4=="女").groupBy(x => x._2).map(x => x._2.map(x => x._6)).collect()
println("成绩大于 150 分的 12 班的女生有"+tmp.map(_.sum).toList.count(x => true)+"个")
}