目录
三张表的样式
预备工作
将三张表存入三个list集合中,通过样例类的方式
//这里会经常用到学生表、成绩表、学科表,所以我们这里构建三个list,将数据存入
//存入之后的数据方便之后使用
var students:List[Students] = _
var scores:List[Scores] = _
var subject:List[Subject] = _
@Before
def read_file: Unit ={
//使用@Test这种方式的时候,默认的运行路径是在这个工作空间下面
//而不是在整个项目下面,所以这里导入项目路径的时候会有一些问题
val stu_bs: BufferedSource = Source.fromFile("D:\\BigDaTa\\JAVA项目\\ShuJia01\\data\\students.txt")
val sco_bs: BufferedSource = Source.fromFile("D:\\BigDaTa\\JAVA项目\\ShuJia01\\data\\score.txt")
val sub_bc: BufferedSource = Source.fromFile("D:\\BigDaTa\\JAVA项目\\ShuJia01\\data\\subject.txt")
students = stu_bs //这里获取的是迭代器,不方便处理,所以转换成list
.getLines()
.toList
.map(
line => {
val splits: Array[String] = line.split(",")
val id: Int = splits(0).toInt
val name: String = splits(1)
val age: Int = splits(2).toInt
val gender: String = splits(3)
val clazz: String = splits(4)
Students(id,name,age,gender,clazz)
}
)
scores = sco_bs
.getLines()
.toList
.map(line=>{
val strings: Array[String] = line.split(",")
val id: Int = strings(0).toInt
val subject_id: Int = strings(1).toInt
val score: Int = strings(2).toInt
Scores(id,subject_id,score)
})
subject = sub_bc
.getLines()
.toList
.map(line=>{
val strings: Array[String] = line.split(",")
val subject_id: Int = strings(0).toInt
val subject_name: String = strings(1)
val subject_score: Int = strings(2).toInt
Subject(subject_id,subject_name,subject_score)
})
stu_bs.close()
sco_bs.close()
sub_bc.close()
}
case class Students(id:Int,name:String,age:Int,gender:String,clazz:String)
case class Scores(id:Int,subject_id:Int,score:Int)
case class Subject(subject_id:Int,subject_name:String,subject_score:Int)
1、统计班级人数
def clazz_people_sum: Unit ={
students
.groupBy(stu=>stu.clazz)
.map(kv=>{
val clazz: String = kv._1
val number: Int = kv._2.size
(clazz,number)
})
.foreach(println)
}
2、统计学生的总分
def sum_score: Unit ={
scores
.groupBy(sco=>sco.id)
.map(sco=>{
val id: Int = sco._1
val sco_list: List[Scores] = sco._2
val sum = sco_list.map(sco => {
sco.score
}).sum
(id,sum)
})
.foreach(println)
}
3、统计年级排名前十学生各科的分数 [学号,学生姓名,学生班级,科目名,分数]
def sum_score_top10_stu: Unit ={
//groupby之后的map如果返回的也是kv格式的数据,则默认会生成一个map
//取出总分排名前十学生的id
val top10ids: List[Int] = scores
.groupBy(sco => sco.id)
.map(sco => {
val id: Int = sco._1
val sco_list: List[Scores] = sco._2
val sum_score = sco_list.map(sco => {
sco.score
}).sum
(id, sum_score) //这里是一个map,没法排序,要转换成list
}).toList
// .sortBy(sco=>sco._2)//这样是从小到大排序
.sortBy(sco => -sco._2) //这样是从大到小排序
.take(10)
.map(sco => sco._1)
//将scores转成map格式的集合,id作为key,自己本身作为value
val scoMap: Map[Int, List[(Int, Scores)]] = scores
.map(sco => {
val id: Int = sco.id
(id, sco) //这里不能直接变成map,因为里面有重复的id,直接变成map的话,会覆盖,所以要先分组
})
.groupBy(sco => sco._1) //这里groupby之后得到的就是map了
//将subject转换成map集合,科目id作为key,自己本身作为value
val subMap: Map[Int, String] = subject
.map(sub => {
val subject_id: Int = sub.subject_id
val subject_name: String = sub.subject_name
(subject_id, subject_name)
}).toMap
//从学生表中根据学生id 提取 学生姓名、学生班级(学生信息)
students
.filter(stu=> top10ids.contains(stu.id))
//filter之后,现在数据的样式是这样的Students(1500100080,巫景彰,21,男,理科五班)
//现在根据学生id补上 科目名,分数
//分数从score表中取,传入一个id,返回一个分数(这是一个map的数据格式,所以额外构建一个分数map,key就是id,value就是本身)
//科目名从subject表中取,传入一个科目id,返回一个科目名
.flatMap(stu=>{
val top10stuLB = ListBuffer[(Int,String,String,String,Int)]()
val id: Int = stu.id
val name: String = stu.name
val clazz: String = stu.clazz
val values: List[(Int, Scores)] = scoMap(stu.id)
val list_score: List[Scores] = values.map(sco=>sco._2)
list_score.map(list=>{
val stu_score: Int = list.score
val subject_id: Int = list.subject_id
val subject_name: String = subMap.get(subject_id).get
top10stuLB.append((id,name,clazz,subject_name,stu_score))
})
// values //这里就把成绩前十的学生的成绩表输出出来了
// (id,name,clazz,values) //这里返回的数据,里面的values中有六条数据,想要这六条数据一条条列出来,就需要用到flatmap,然后返回一个集合
top10stuLB
})
.foreach(println)
}
4、统计总分大于年级平均分的学生 [学号,姓名,班级,总分]
def sum_score_beyond_avg: Unit ={
//构建students表的map 学生id和学生信息
val stuMap: Map[Int, Students] = students
.map(stu => {
val id = stu.id
(id, stu)
}).toMap
//年级平均分
val sum: Int = scores
.groupBy(sco => sco.id)
.map(sc => {
val id: Int = sc._1
val list_score: List[Scores] = sc._2
val sum: Int = list_score.map(sc => {
sc.score
}).sum
(sum)
}).sum
val avg_score:Double = sum/1000 //年级平均分
//学生总分
val stu_sum: Map[Int, Int] = scores
.groupBy(sco => sco.id)
.map(sco => {
val id: Int = sco._1
val list_score: List[Scores] = sco._2
val sum = list_score.map(list => {
list.score
}).sum
(id, sum)
})
//找出总分大于年级平均分的学生 id和分数
stu_sum
.filter(stu=>{stu._2>avg_score})
.map(stu=>{
val id: Int = stu._1
val stu_sum: Int = stu._2
val student: Students = stuMap.get(id).get
val name = student.name
val clazz = student.clazz
(id,name,clazz,stu_sum)
})
.foreach(println)
}
5、统计每科都及格的学生 [学号,姓名,班级,科目,分数]
def every_subject_pass: Unit ={
//这里需要对成绩做一个归一化,也就是说有的成绩是150满分,有的是100满分,需要做一个归一化
val stu_pass_map: Map[Int, (String, Double)] = subject.map(
sub => {
val subject_id: Int = sub.subject_id
val subject_name: String = sub.subject_name
val subject_score: Int = sub.subject_score
val pass: Double = subject_score * 0.6 //及格分数
(subject_id, (subject_name, pass))
}
).toMap
//六门课都及格的学生id,这里最关键的就是这里求学生id,之后根据学生id求其他的内容都可以从上面取了
val stu_id: List[Int] = scores.filter(
sco => {
var flag = false
if (sco.score >= stu_pass_map(sco.subject_id)._2) {
flag = true
}
flag
})
//在这之后再对学生id进行分组,统计还有几条成绩记录,要是还有六条说明没有过滤掉
//没有过滤掉成绩,说明每门课都及格了
.groupBy(sco => sco.id)
.map(kv => {
val id: Int = kv._1
val scores: List[Scores] = kv._2 //这里得到的是一条条的学生成绩
val size = scores.size
(id, size)
})
.filter(kv => {
kv._2 == 6
})
.map(_._1)
.toList
//将scores转成map格式的集合,id作为key,自己本身作为value
val scoMap: Map[Int, List[(Int, Scores)]] = scores
.map(sco => {
val id: Int = sco.id
(id, sco) //这里不能直接变成map,因为里面有重复的id,直接变成map的话,会覆盖,所以要先分组
})
.groupBy(sco => sco._1) //这里groupby之后得到的就是map了
//将subject转换成map集合,科目id作为key,自己本身作为value
val subMap: Map[Int, String] = subject
.map(sub => {
val subject_id: Int = sub.subject_id
val subject_name: String = sub.subject_name
(subject_id, subject_name)
}).toMap
//从学生表中根据学生id 提取 学生姓名、学生班级(学生信息)
students
.filter(stu=> stu_id.contains(stu.id))
//filter之后,现在数据的样式是这样的Students(1500100080,巫景彰,21,男,理科五班)
//现在根据学生id补上 科目名,分数
//分数从score表中取,传入一个id,返回一个分数(这是一个map的数据格式,所以额外构建一个分数map,key就是id,value就是本身)
//科目名从subject表中取,传入一个科目id,返回一个科目名
.flatMap(stu=>{
val top10stuLB = ListBuffer[(Int,String,String,String,Int)]()
val id: Int = stu.id
val name: String = stu.name
val clazz: String = stu.clazz
val values: List[(Int, Scores)] = scoMap(stu.id)
val list_score: List[Scores] = values.map(sco=>sco._2)
list_score.map(list=>{
val stu_score: Int = list.score
val subject_id: Int = list.subject_id
val subject_name: String = subMap.get(subject_id).get
top10stuLB.append((id,name,clazz,subject_name,stu_score))
})
// values //这里就把成绩前十的学生的成绩表输出出来了
// (id,name,clazz,values) //这里返回的数据,里面的values中有六条数据,想要这六条数据一条条列出来,就需要用到flatmap,然后返回一个集合
top10stuLB
})
.foreach(println)
}
6、统计每个班级的前三名 [学号,姓名,班级,分数]
def clazz_sum_score_top3: Unit ={
//这里输入一个学生id就会返回学生信息
//需要关联student表,所以这里将students表转换成map
val stuMap: Map[Int, Students] = students
.map(
stu => {
val id = stu.id
(id, stu)
}
)
.toMap
//先求出所有学生的总分-学生总分表
val stu_score: Map[Int, Int] = scores
.groupBy(sc => sc.id)
.map(sc => {
val id: Int = sc._1
val list_score: List[Scores] = sc._2
val sum: Int = list_score.map(list => {
list.score
}).sum
(id, sum)
})
stu_score.map(
stu=>{
val id: Int = stu._1 //学生id
val stu_sum: Int = stu._2 //学生总分
val students: Students = stuMap(id)
val name: String = students.name //学生姓名
val clazz: String = students.clazz //学生班级
(id,name,clazz,stu_sum)
}
)
//按照班级分组,取前三
.groupBy(stu=>stu._3)
.flatMap(kv=>{ //flatMap需要返回一个集合
val clazz: StringOps = kv._1
val stuList: List[(Int, String, String, Int)] = kv._2.toList
val top3students: List[(Int, String, String, Int)] = stuList
.sortBy(stu => -stu._4)
.take(3)
top3students
})
.foreach(println)
}
7、统计偏科最严重的前100名学生 [学号,姓名,班级,科目,分数]
什么是偏科最严重(方差),根据六门科目的成绩分别计算方差
//什么是偏科最严重(方差),根据六门科目的成绩分别计算方差
//7、统计偏科最严重的前100名学生 [学号,姓名,班级,科目,分数]
@Test
def unbalanceTop100Stu: Unit ={
//先将学科表转换成一个map
val sub_score_map: Map[Int, Int] = subject.map(
sub => {
(sub.subject_id, sub.subject_score)
}
).toMap
//这个是每个学生的每个分数
//现在需要将scores表中所有的数据转换成百分制
//这里需要先对数据做一个归一化
//每一门科目的分数不同,加入150满分,考了90,这里就需要换算成60
val id_new_score: List[(Int, Double)] = scores.map(
sco => {
val id = sco.id
val stu_score = sco.score //学生每一门科目的成绩
val subject_id = sco.subject_id
val subject_score = sub_score_map(subject_id) //学科的总分
val new_score = stu_score.toDouble / subject_score * 100
(id, new_score)
}
)
//这里计算每一个学生的平均分
val id_avg_score: Map[Int, Double] = id_new_score.groupBy(id => id._1)
.map(kv => {
val id = kv._1
val scores: List[(Int, Double)] = kv._2
val avg_score = scores.map(_._2).sum / scores.size.toDouble
(id, avg_score)
})
//接下来计算每个学生的方差
val variance_id: List[Int] = id_new_score.map(
kv => {
val id = kv._1
val avg_score: Double = id_avg_score(id)
val new_score: Double = kv._2
//计算 (分数 - 平均分)^2
(id, Math.pow((new_score - avg_score), 2))
}
).groupBy(id => id._1)
.map(kv => {
val id = kv._1
val values: List[(Int, Double)] = kv._2
val variance = values.map(_._2).sum / values.size
(id, variance)
}).toList
.sortBy(-_._2)
.take(100)
.map(_._1)
//将scores转成map格式的集合,id作为key,自己本身作为value
val scoMap: Map[Int, List[(Int, Scores)]] = scores
.map(sco => {
val id: Int = sco.id
(id, sco) //这里不能直接变成map,因为里面有重复的id,直接变成map的话,会覆盖,所以要先分组
})
.groupBy(sco => sco._1) //这里groupby之后得到的就是map了
//将subject转换成map集合,科目id作为key,自己本身作为value
val subMap: Map[Int, String] = subject
.map(sub => {
val subject_id: Int = sub.subject_id
val subject_name: String = sub.subject_name
(subject_id, subject_name)
}).toMap
//从学生表中根据学生id 提取 学生姓名、学生班级(学生信息)
students
.filter(stu=> variance_id.contains(stu.id))
//filter之后,现在数据的样式是这样的Students(1500100080,巫景彰,21,男,理科五班)
//现在根据学生id补上 科目名,分数
//分数从score表中取,传入一个id,返回一个分数(这是一个map的数据格式,所以额外构建一个分数map,key就是id,value就是本身)
//科目名从subject表中取,传入一个科目id,返回一个科目名
.flatMap(stu=>{
val top10stuLB = ListBuffer[(Int,String,String,String,Int)]()
val id: Int = stu.id
val name: String = stu.name
val clazz: String = stu.clazz
val values: List[(Int, Scores)] = scoMap(stu.id)
val list_score: List[Scores] = values.map(sco=>sco._2)
list_score.map(list=>{
val stu_score: Int = list.score
val subject_id: Int = list.subject_id
val subject_name: String = subMap.get(subject_id).get
top10stuLB.append((id,name,clazz,subject_name,stu_score))
})
// values //这里就把成绩前十的学生的成绩表输出出来了
// (id,name,clazz,values) //这里返回的数据,里面的values中有六条数据,想要这六条数据一条条列出来,就需要用到flatmap,然后返回一个集合
top10stuLB
})
.foreach(println)
}
整体代码
import java.util
import org.junit.{Before, Test}
import scala.collection.immutable
import scala.collection.immutable.StringOps
import scala.collection.mutable.ListBuffer
import scala.io.{BufferedSource, Source}
/*
1.统计班级人数
2.统计学生总分
*/
class test {
//这里会经常用到学生表、成绩表、学科表,所以我们这里构建三个list,将数据存入
//存入之后的数据方便之后使用
var students:List[Students] = _
var scores:List[Scores] = _
var subject:List[Subject] = _
@Before
def read_file: Unit ={
//使用@Test这种方式的时候,默认的运行路径是在这个工作空间下面
//而不是在整个项目下面,所以这里导入项目路径的时候会有一些问题
val stu_bs: BufferedSource = Source.fromFile("D:\\BigDaTa\\JAVA项目\\ShuJia01\\data\\students.txt")
val sco_bs: BufferedSource = Source.fromFile("D:\\BigDaTa\\JAVA项目\\ShuJia01\\data\\score.txt")
val sub_bc: BufferedSource = Source.fromFile("D:\\BigDaTa\\JAVA项目\\ShuJia01\\data\\subject.txt")
students = stu_bs //这里获取的是迭代器,不方便处理,所以转换成list
.getLines()
.toList
.map(
line => {
val splits: Array[String] = line.split(",")
val id: Int = splits(0).toInt
val name: String = splits(1)
val age: Int = splits(2).toInt
val gender: String = splits(3)
val clazz: String = splits(4)
Students(id,name,age,gender,clazz)
}
)
scores = sco_bs
.getLines()
.toList
.map(line=>{
val strings: Array[String] = line.split(",")
val id: Int = strings(0).toInt
val subject_id: Int = strings(1).toInt
val score: Int = strings(2).toInt
Scores(id,subject_id,score)
})
subject = sub_bc
.getLines()
.toList
.map(line=>{
val strings: Array[String] = line.split(",")
val subject_id: Int = strings(0).toInt
val subject_name: String = strings(1)
val subject_score: Int = strings(2).toInt
Subject(subject_id,subject_name,subject_score)
})
stu_bs.close()
sco_bs.close()
sub_bc.close()
}
@Test
def printAll: Unit ={
students.take(5).foreach(println)
scores.take(5).foreach(println)
subject.take(5).foreach(println)
}
//1.统计班级人数
@Test
def clazz_people_sum: Unit ={
students
.groupBy(stu=>stu.clazz)
.map(kv=>{
val clazz: String = kv._1
val number: Int = kv._2.size
(clazz,number)
})
.foreach(println)
}
//2.统计学生总分
@Test
def sum_score: Unit ={
scores
.groupBy(sco=>sco.id)
.map(sco=>{
val id: Int = sco._1
val sco_list: List[Scores] = sco._2
val sum = sco_list.map(sco => {
sco.score
}).sum
(id,sum)
})
.foreach(println)
}
//3.统计年级排名前十学生各科的分数 [学号,学生姓名,学生班级,科目名,分数]
@Test
def sum_score_top10_stu: Unit ={
//groupby之后的map如果返回的也是kv格式的数据,则默认会生成一个map
//取出总分排名前十学生的id
val top10ids: List[Int] = scores
.groupBy(sco => sco.id)
.map(sco => {
val id: Int = sco._1
val sco_list: List[Scores] = sco._2
val sum_score = sco_list.map(sco => {
sco.score
}).sum
(id, sum_score) //这里是一个map,没法排序,要转换成list
}).toList
// .sortBy(sco=>sco._2)//这样是从小到大排序
.sortBy(sco => -sco._2) //这样是从大到小排序
.take(10)
.map(sco => sco._1)
//将scores转成map格式的集合,id作为key,自己本身作为value
val scoMap: Map[Int, List[(Int, Scores)]] = scores
.map(sco => {
val id: Int = sco.id
(id, sco) //这里不能直接变成map,因为里面有重复的id,直接变成map的话,会覆盖,所以要先分组
})
.groupBy(sco => sco._1) //这里groupby之后得到的就是map了
//将subject转换成map集合,科目id作为key,自己本身作为value
val subMap: Map[Int, String] = subject
.map(sub => {
val subject_id: Int = sub.subject_id
val subject_name: String = sub.subject_name
(subject_id, subject_name)
}).toMap
//从学生表中根据学生id 提取 学生姓名、学生班级(学生信息)
students
.filter(stu=> top10ids.contains(stu.id))
//filter之后,现在数据的样式是这样的Students(1500100080,巫景彰,21,男,理科五班)
//现在根据学生id补上 科目名,分数
//分数从score表中取,传入一个id,返回一个分数(这是一个map的数据格式,所以额外构建一个分数map,key就是id,value就是本身)
//科目名从subject表中取,传入一个科目id,返回一个科目名
.flatMap(stu=>{
val top10stuLB = ListBuffer[(Int,String,String,String,Int)]()
val id: Int = stu.id
val name: String = stu.name
val clazz: String = stu.clazz
val values: List[(Int, Scores)] = scoMap(stu.id)
val list_score: List[Scores] = values.map(sco=>sco._2)
list_score.map(list=>{
val stu_score: Int = list.score
val subject_id: Int = list.subject_id
val subject_name: String = subMap.get(subject_id).get
top10stuLB.append((id,name,clazz,subject_name,stu_score))
})
// values //这里就把成绩前十的学生的成绩表输出出来了
// (id,name,clazz,values) //这里返回的数据,里面的values中有六条数据,想要这六条数据一条条列出来,就需要用到flatmap,然后返回一个集合
top10stuLB
})
.foreach(println)
}
//4.统计总分大于年级平均分的学生 [学号,姓名,班级,总分]
@Test
def sum_score_beyond_avg: Unit ={
//构建students表的map 学生id和学生信息
val stuMap: Map[Int, Students] = students
.map(stu => {
val id = stu.id
(id, stu)
}).toMap
//年级平均分
val sum: Int = scores
.groupBy(sco => sco.id)
.map(sc => {
val id: Int = sc._1
val list_score: List[Scores] = sc._2
val sum: Int = list_score.map(sc => {
sc.score
}).sum
(sum)
}).sum
val avg_score:Double = sum/1000 //年级平均分
//学生总分
val stu_sum: Map[Int, Int] = scores
.groupBy(sco => sco.id)
.map(sco => {
val id: Int = sco._1
val list_score: List[Scores] = sco._2
val sum = list_score.map(list => {
list.score
}).sum
(id, sum)
})
//找出总分大于年级平均分的学生 id和分数
stu_sum
.filter(stu=>{stu._2>avg_score})
.map(stu=>{
val id: Int = stu._1
val stu_sum: Int = stu._2
val student: Students = stuMap.get(id).get
val name = student.name
val clazz = student.clazz
(id,name,clazz,stu_sum)
})
.foreach(println)
}
//5.取出每门课都及格的学生 [学号,姓名,班级,科目,分数]
@Test
def every_subject_pass: Unit ={
//这里需要对成绩做一个归一化,也就是说有的成绩是150满分,有的是100满分,需要做一个归一化
val stu_pass_map: Map[Int, (String, Double)] = subject.map(
sub => {
val subject_id: Int = sub.subject_id
val subject_name: String = sub.subject_name
val subject_score: Int = sub.subject_score
val pass: Double = subject_score * 0.6 //及格分数
(subject_id, (subject_name, pass))
}
).toMap
//六门课都及格的学生id,这里最关键的就是这里求学生id,之后根据学生id求其他的内容都可以从上面取了
val stu_id: List[Int] = scores.filter(
sco => {
var flag = false
if (sco.score >= stu_pass_map(sco.subject_id)._2) {
flag = true
}
flag
})
//在这之后再对学生id进行分组,统计还有几条成绩记录,要是还有六条说明没有过滤掉
//没有过滤掉成绩,说明每门课都及格了
.groupBy(sco => sco.id)
.map(kv => {
val id: Int = kv._1
val scores: List[Scores] = kv._2 //这里得到的是一条条的学生成绩
val size = scores.size
(id, size)
})
.filter(kv => {
kv._2 == 6
})
.map(_._1)
.toList
//将scores转成map格式的集合,id作为key,自己本身作为value
val scoMap: Map[Int, List[(Int, Scores)]] = scores
.map(sco => {
val id: Int = sco.id
(id, sco) //这里不能直接变成map,因为里面有重复的id,直接变成map的话,会覆盖,所以要先分组
})
.groupBy(sco => sco._1) //这里groupby之后得到的就是map了
//将subject转换成map集合,科目id作为key,自己本身作为value
val subMap: Map[Int, String] = subject
.map(sub => {
val subject_id: Int = sub.subject_id
val subject_name: String = sub.subject_name
(subject_id, subject_name)
}).toMap
//从学生表中根据学生id 提取 学生姓名、学生班级(学生信息)
students
.filter(stu=> stu_id.contains(stu.id))
//filter之后,现在数据的样式是这样的Students(1500100080,巫景彰,21,男,理科五班)
//现在根据学生id补上 科目名,分数
//分数从score表中取,传入一个id,返回一个分数(这是一个map的数据格式,所以额外构建一个分数map,key就是id,value就是本身)
//科目名从subject表中取,传入一个科目id,返回一个科目名
.flatMap(stu=>{
val top10stuLB = ListBuffer[(Int,String,String,String,Int)]()
val id: Int = stu.id
val name: String = stu.name
val clazz: String = stu.clazz
val values: List[(Int, Scores)] = scoMap(stu.id)
val list_score: List[Scores] = values.map(sco=>sco._2)
list_score.map(list=>{
val stu_score: Int = list.score
val subject_id: Int = list.subject_id
val subject_name: String = subMap.get(subject_id).get
top10stuLB.append((id,name,clazz,subject_name,stu_score))
})
// values //这里就把成绩前十的学生的成绩表输出出来了
// (id,name,clazz,values) //这里返回的数据,里面的values中有六条数据,想要这六条数据一条条列出来,就需要用到flatmap,然后返回一个集合
top10stuLB
})
.foreach(println)
}
@Test
//6.统计每个班级的前三名 [学号,姓名,班级,分数]
def clazz_sum_score_top3: Unit ={
//这里输入一个学生id就会返回学生信息
//需要关联student表,所以这里将students表转换成map
val stuMap: Map[Int, Students] = students
.map(
stu => {
val id = stu.id
(id, stu)
}
)
.toMap
//先求出所有学生的总分-学生总分表
val stu_score: Map[Int, Int] = scores
.groupBy(sc => sc.id)
.map(sc => {
val id: Int = sc._1
val list_score: List[Scores] = sc._2
val sum: Int = list_score.map(list => {
list.score
}).sum
(id, sum)
})
stu_score.map(
stu=>{
val id: Int = stu._1 //学生id
val stu_sum: Int = stu._2 //学生总分
val students: Students = stuMap(id)
val name: String = students.name //学生姓名
val clazz: String = students.clazz //学生班级
(id,name,clazz,stu_sum)
}
)
//按照班级分组,取前三
.groupBy(stu=>stu._3)
.flatMap(kv=>{ //flatMap需要返回一个集合
val clazz: StringOps = kv._1
val stuList: List[(Int, String, String, Int)] = kv._2.toList
val top3students: List[(Int, String, String, Int)] = stuList
.sortBy(stu => -stu._4)
.take(3)
top3students
})
.foreach(println)
}
//什么是偏科最严重(方差),根据六门科目的成绩分别计算方差
//7、统计偏科最严重的前100名学生 [学号,姓名,班级,科目,分数]
@Test
def unbalanceTop100Stu: Unit ={
//先将学科表转换成一个map
val sub_score_map: Map[Int, Int] = subject.map(
sub => {
(sub.subject_id, sub.subject_score)
}
).toMap
//这个是每个学生的每个分数
//现在需要将scores表中所有的数据转换成百分制
//这里需要先对数据做一个归一化
//每一门科目的分数不同,加入150满分,考了90,这里就需要换算成60
val id_new_score: List[(Int, Double)] = scores.map(
sco => {
val id = sco.id
val stu_score = sco.score //学生每一门科目的成绩
val subject_id = sco.subject_id
val subject_score = sub_score_map(subject_id) //学科的总分
val new_score = stu_score.toDouble / subject_score * 100
(id, new_score)
}
)
//这里计算每一个学生的平均分
val id_avg_score: Map[Int, Double] = id_new_score.groupBy(id => id._1)
.map(kv => {
val id = kv._1
val scores: List[(Int, Double)] = kv._2
val avg_score = scores.map(_._2).sum / scores.size.toDouble
(id, avg_score)
})
//接下来计算每个学生的方差
val variance_id: List[Int] = id_new_score.map(
kv => {
val id = kv._1
val avg_score: Double = id_avg_score(id)
val new_score: Double = kv._2
//计算 (分数 - 平均分)^2
(id, Math.pow((new_score - avg_score), 2))
}
).groupBy(id => id._1)
.map(kv => {
val id = kv._1
val values: List[(Int, Double)] = kv._2
val variance = values.map(_._2).sum / values.size
(id, variance)
}).toList
.sortBy(-_._2)
.take(100)
.map(_._1)
//将scores转成map格式的集合,id作为key,自己本身作为value
val scoMap: Map[Int, List[(Int, Scores)]] = scores
.map(sco => {
val id: Int = sco.id
(id, sco) //这里不能直接变成map,因为里面有重复的id,直接变成map的话,会覆盖,所以要先分组
})
.groupBy(sco => sco._1) //这里groupby之后得到的就是map了
//将subject转换成map集合,科目id作为key,自己本身作为value
val subMap: Map[Int, String] = subject
.map(sub => {
val subject_id: Int = sub.subject_id
val subject_name: String = sub.subject_name
(subject_id, subject_name)
}).toMap
//从学生表中根据学生id 提取 学生姓名、学生班级(学生信息)
students
.filter(stu=> variance_id.contains(stu.id))
//filter之后,现在数据的样式是这样的Students(1500100080,巫景彰,21,男,理科五班)
//现在根据学生id补上 科目名,分数
//分数从score表中取,传入一个id,返回一个分数(这是一个map的数据格式,所以额外构建一个分数map,key就是id,value就是本身)
//科目名从subject表中取,传入一个科目id,返回一个科目名
.flatMap(stu=>{
val top10stuLB = ListBuffer[(Int,String,String,String,Int)]()
val id: Int = stu.id
val name: String = stu.name
val clazz: String = stu.clazz
val values: List[(Int, Scores)] = scoMap(stu.id)
val list_score: List[Scores] = values.map(sco=>sco._2)
list_score.map(list=>{
val stu_score: Int = list.score
val subject_id: Int = list.subject_id
val subject_name: String = subMap.get(subject_id).get
top10stuLB.append((id,name,clazz,subject_name,stu_score))
})
// values //这里就把成绩前十的学生的成绩表输出出来了
// (id,name,clazz,values) //这里返回的数据,里面的values中有六条数据,想要这六条数据一条条列出来,就需要用到flatmap,然后返回一个集合
top10stuLB
})
.foreach(println)
}
//封装一个函数:根据id取出学生信息
def id_info(id:List[Int]): Unit ={
//将scores转成map格式的集合,id作为key,自己本身作为value
val scoMap: Map[Int, List[(Int, Scores)]] = scores
.map(sco => {
val id: Int = sco.id
(id, sco) //这里不能直接变成map,因为里面有重复的id,直接变成map的话,会覆盖,所以要先分组
})
.groupBy(sco => sco._1) //这里groupby之后得到的就是map了
//将subject转换成map集合,科目id作为key,自己本身作为value
val subMap: Map[Int, String] = subject
.map(sub => {
val subject_id: Int = sub.subject_id
val subject_name: String = sub.subject_name
(subject_id, subject_name)
}).toMap
//从学生表中根据学生id 提取 学生姓名、学生班级(学生信息)
students
.filter(stu=> id.contains(stu.id))
//filter之后,现在数据的样式是这样的Students(1500100080,巫景彰,21,男,理科五班)
//现在根据学生id补上 科目名,分数
//分数从score表中取,传入一个id,返回一个分数(这是一个map的数据格式,所以额外构建一个分数map,key就是id,value就是本身)
//科目名从subject表中取,传入一个科目id,返回一个科目名
.flatMap(stu=>{
val top10stuLB = ListBuffer[(Int,String,String,String,Int)]()
val id: Int = stu.id
val name: String = stu.name
val clazz: String = stu.clazz
val values: List[(Int, Scores)] = scoMap(stu.id)
val list_score: List[Scores] = values.map(sco=>sco._2)
list_score.map(list=>{
val stu_score: Int = list.score
val subject_id: Int = list.subject_id
val subject_name: String = subMap.get(subject_id).get
top10stuLB.append((id,name,clazz,subject_name,stu_score))
})
// values //这里就把成绩前十的学生的成绩表输出出来了
// (id,name,clazz,values) //这里返回的数据,里面的values中有六条数据,想要这六条数据一条条列出来,就需要用到flatmap,然后返回一个集合
top10stuLB
})
.foreach(println)
}
}
case class Students(id:Int,name:String,age:Int,gender:String,clazz:String)
case class Scores(id:Int,subject_id:Int,score:Int)
case class Subject(subject_id:Int,subject_name:String,subject_score:Int)
感谢阅读,我是啊帅和和,一位大数据专业大四学生,祝你快乐。