Scala编程练习

三张表的样式
在这里插入图片描述

预备工作

将三张表存入三个list集合中,通过样例类的方式

//这里会经常用到学生表、成绩表、学科表,所以我们这里构建三个list,将数据存入
  //存入之后的数据方便之后使用
  var students:List[Students] = _
  var scores:List[Scores] = _
  var subject:List[Subject] = _

  @Before
  def read_file: Unit ={
    //使用@Test这种方式的时候,默认的运行路径是在这个工作空间下面
    //而不是在整个项目下面,所以这里导入项目路径的时候会有一些问题
    val stu_bs: BufferedSource = Source.fromFile("D:\\BigDaTa\\JAVA项目\\ShuJia01\\data\\students.txt")

    val sco_bs: BufferedSource = Source.fromFile("D:\\BigDaTa\\JAVA项目\\ShuJia01\\data\\score.txt")

    val sub_bc: BufferedSource = Source.fromFile("D:\\BigDaTa\\JAVA项目\\ShuJia01\\data\\subject.txt")

    students = stu_bs //这里获取的是迭代器,不方便处理,所以转换成list
      .getLines()
      .toList
      .map(
        line => {
          val splits: Array[String] = line.split(",")
          val id: Int = splits(0).toInt
          val name: String = splits(1)
          val age: Int = splits(2).toInt
          val gender: String = splits(3)
          val clazz: String = splits(4)
          Students(id,name,age,gender,clazz)
        }
      )

    scores = sco_bs
        .getLines()
        .toList
        .map(line=>{
          val strings: Array[String] = line.split(",")
          val id: Int = strings(0).toInt
          val subject_id: Int = strings(1).toInt
          val score: Int = strings(2).toInt
          Scores(id,subject_id,score)
        })

    subject = sub_bc
        .getLines()
        .toList
        .map(line=>{
          val strings: Array[String] = line.split(",")
          val subject_id: Int = strings(0).toInt
          val subject_name: String = strings(1)
          val subject_score: Int = strings(2).toInt
          Subject(subject_id,subject_name,subject_score)
        })

      
    stu_bs.close()

    sco_bs.close()

    sub_bc.close()
  }

case class Students(id:Int,name:String,age:Int,gender:String,clazz:String)
case class Scores(id:Int,subject_id:Int,score:Int)
case class Subject(subject_id:Int,subject_name:String,subject_score:Int)

1、统计班级人数

def clazz_people_sum: Unit ={
    students
      .groupBy(stu=>stu.clazz)
      .map(kv=>{
        val clazz: String = kv._1
        val number: Int = kv._2.size
        (clazz,number)
      })
      .foreach(println)
  }

2、统计学生的总分

def sum_score: Unit ={
    scores
      .groupBy(sco=>sco.id)
      .map(sco=>{
        val id: Int = sco._1
        val sco_list: List[Scores] = sco._2
        val sum = sco_list.map(sco => {
          sco.score
        }).sum
        (id,sum)
      })
      .foreach(println)

  }

3、统计年级排名前十学生各科的分数 [学号,学生姓名,学生班级,科目名,分数]

def sum_score_top10_stu: Unit ={
    //groupby之后的map如果返回的也是kv格式的数据,则默认会生成一个map
    //取出总分排名前十学生的id
    val top10ids: List[Int] = scores
      .groupBy(sco => sco.id)
      .map(sco => {
        val id: Int = sco._1
        val sco_list: List[Scores] = sco._2
        val sum_score = sco_list.map(sco => {
          sco.score
        }).sum
        (id, sum_score) //这里是一个map,没法排序,要转换成list
      }).toList
      //      .sortBy(sco=>sco._2)//这样是从小到大排序
      .sortBy(sco => -sco._2) //这样是从大到小排序
      .take(10)
      .map(sco => sco._1)

    //将scores转成map格式的集合,id作为key,自己本身作为value
    val scoMap: Map[Int, List[(Int, Scores)]] = scores
      .map(sco => {
        val id: Int = sco.id
        (id, sco) //这里不能直接变成map,因为里面有重复的id,直接变成map的话,会覆盖,所以要先分组
      })
      .groupBy(sco => sco._1) //这里groupby之后得到的就是map了

    //将subject转换成map集合,科目id作为key,自己本身作为value
    val subMap: Map[Int, String] = subject
      .map(sub => {
        val subject_id: Int = sub.subject_id
        val subject_name: String = sub.subject_name
        (subject_id, subject_name)
      }).toMap

    //从学生表中根据学生id 提取 学生姓名、学生班级(学生信息)
    students
      .filter(stu=> top10ids.contains(stu.id))
      //filter之后,现在数据的样式是这样的Students(1500100080,巫景彰,21,男,理科五班)
      //现在根据学生id补上 科目名,分数
      //分数从score表中取,传入一个id,返回一个分数(这是一个map的数据格式,所以额外构建一个分数map,key就是id,value就是本身)
      //科目名从subject表中取,传入一个科目id,返回一个科目名
      .flatMap(stu=>{
      val top10stuLB = ListBuffer[(Int,String,String,String,Int)]()
      val id: Int = stu.id
      val name: String = stu.name
      val clazz: String = stu.clazz
      val values: List[(Int, Scores)] = scoMap(stu.id)
      val list_score: List[Scores] = values.map(sco=>sco._2)
      list_score.map(list=>{
        val stu_score: Int = list.score
        val subject_id: Int = list.subject_id
        val subject_name: String = subMap.get(subject_id).get
        top10stuLB.append((id,name,clazz,subject_name,stu_score))
      })

//      values //这里就把成绩前十的学生的成绩表输出出来了
//      (id,name,clazz,values) //这里返回的数据,里面的values中有六条数据,想要这六条数据一条条列出来,就需要用到flatmap,然后返回一个集合
      top10stuLB
    })
      .foreach(println)
  }

4、统计总分大于年级平均分的学生 [学号,姓名,班级,总分]

def sum_score_beyond_avg: Unit ={

    //构建students表的map 学生id和学生信息
    val stuMap: Map[Int, Students] = students
      .map(stu => {
        val id = stu.id
        (id, stu)
      }).toMap

    //年级平均分
    val sum: Int = scores
      .groupBy(sco => sco.id)
      .map(sc => {
        val id: Int = sc._1
        val list_score: List[Scores] = sc._2
        val sum: Int = list_score.map(sc => {
          sc.score
        }).sum
        (sum)
      }).sum
    val avg_score:Double = sum/1000 //年级平均分

    //学生总分
    val stu_sum: Map[Int, Int] = scores
      .groupBy(sco => sco.id)
      .map(sco => {
        val id: Int = sco._1
        val list_score: List[Scores] = sco._2
        val sum = list_score.map(list => {
          list.score
        }).sum
        (id, sum)
      })

    //找出总分大于年级平均分的学生 id和分数
    stu_sum
      .filter(stu=>{stu._2>avg_score})
      .map(stu=>{
        val id: Int = stu._1
        val stu_sum: Int = stu._2
        val student: Students = stuMap.get(id).get
        val name = student.name
        val clazz = student.clazz
        (id,name,clazz,stu_sum)
      })
      .foreach(println)
  }

5、统计每科都及格的学生 [学号,姓名,班级,科目,分数]

def every_subject_pass: Unit ={
    //这里需要对成绩做一个归一化,也就是说有的成绩是150满分,有的是100满分,需要做一个归一化
    val stu_pass_map: Map[Int, (String, Double)] = subject.map(
      sub => {
        val subject_id: Int = sub.subject_id
        val subject_name: String = sub.subject_name
        val subject_score: Int = sub.subject_score
        val pass: Double = subject_score * 0.6 //及格分数
        (subject_id, (subject_name, pass))
      }
    ).toMap

    //六门课都及格的学生id,这里最关键的就是这里求学生id,之后根据学生id求其他的内容都可以从上面取了
    val stu_id: List[Int] = scores.filter(
      sco => {
        var flag = false
        if (sco.score >= stu_pass_map(sco.subject_id)._2) {
          flag = true
        }
        flag
      })
      //在这之后再对学生id进行分组,统计还有几条成绩记录,要是还有六条说明没有过滤掉
      //没有过滤掉成绩,说明每门课都及格了
      .groupBy(sco => sco.id)
      .map(kv => {
        val id: Int = kv._1
        val scores: List[Scores] = kv._2 //这里得到的是一条条的学生成绩
        val size = scores.size
        (id, size)
      })
      .filter(kv => {
        kv._2 == 6
      })
      .map(_._1)
      .toList

    //将scores转成map格式的集合,id作为key,自己本身作为value
    val scoMap: Map[Int, List[(Int, Scores)]] = scores
      .map(sco => {
        val id: Int = sco.id
        (id, sco) //这里不能直接变成map,因为里面有重复的id,直接变成map的话,会覆盖,所以要先分组
      })
      .groupBy(sco => sco._1) //这里groupby之后得到的就是map了

    //将subject转换成map集合,科目id作为key,自己本身作为value
    val subMap: Map[Int, String] = subject
      .map(sub => {
        val subject_id: Int = sub.subject_id
        val subject_name: String = sub.subject_name
        (subject_id, subject_name)
      }).toMap


    //从学生表中根据学生id 提取 学生姓名、学生班级(学生信息)
    students
      .filter(stu=> stu_id.contains(stu.id))
      //filter之后,现在数据的样式是这样的Students(1500100080,巫景彰,21,男,理科五班)
      //现在根据学生id补上 科目名,分数
      //分数从score表中取,传入一个id,返回一个分数(这是一个map的数据格式,所以额外构建一个分数map,key就是id,value就是本身)
      //科目名从subject表中取,传入一个科目id,返回一个科目名
      .flatMap(stu=>{
      val top10stuLB = ListBuffer[(Int,String,String,String,Int)]()
      val id: Int = stu.id
      val name: String = stu.name
      val clazz: String = stu.clazz
      val values: List[(Int, Scores)] = scoMap(stu.id)
      val list_score: List[Scores] = values.map(sco=>sco._2)
      list_score.map(list=>{
        val stu_score: Int = list.score
        val subject_id: Int = list.subject_id
        val subject_name: String = subMap.get(subject_id).get
        top10stuLB.append((id,name,clazz,subject_name,stu_score))
      })

      //      values //这里就把成绩前十的学生的成绩表输出出来了
      //      (id,name,clazz,values) //这里返回的数据,里面的values中有六条数据,想要这六条数据一条条列出来,就需要用到flatmap,然后返回一个集合
      top10stuLB
    })
      .foreach(println)
  }

6、统计每个班级的前三名 [学号,姓名,班级,分数]

def clazz_sum_score_top3: Unit ={
    //这里输入一个学生id就会返回学生信息
    //需要关联student表,所以这里将students表转换成map
    val stuMap: Map[Int, Students] = students
      .map(
        stu => {
          val id = stu.id
          (id, stu)
        }
      )
      .toMap

    //先求出所有学生的总分-学生总分表
    val stu_score: Map[Int, Int] = scores
      .groupBy(sc => sc.id)
      .map(sc => {
        val id: Int = sc._1
        val list_score: List[Scores] = sc._2
        val sum: Int = list_score.map(list => {
          list.score
        }).sum
        (id, sum)
      })

    stu_score.map(
      stu=>{
        val id: Int = stu._1 //学生id
        val stu_sum: Int = stu._2 //学生总分
        val students: Students = stuMap(id)
        val name: String = students.name //学生姓名
        val clazz: String = students.clazz //学生班级
        (id,name,clazz,stu_sum)
      }
    )
      //按照班级分组,取前三
      .groupBy(stu=>stu._3)
      .flatMap(kv=>{ //flatMap需要返回一个集合
        val clazz: StringOps = kv._1
        val stuList: List[(Int, String, String, Int)] = kv._2.toList
        val top3students: List[(Int, String, String, Int)] = stuList
          .sortBy(stu => -stu._4)
          .take(3)
        top3students
      })

      .foreach(println)

  }

7、统计偏科最严重的前100名学生 [学号,姓名,班级,科目,分数]

什么是偏科最严重(方差),根据六门科目的成绩分别计算方差

//什么是偏科最严重(方差),根据六门科目的成绩分别计算方差
  //7、统计偏科最严重的前100名学生  [学号,姓名,班级,科目,分数]
  @Test
  def unbalanceTop100Stu: Unit ={
    //先将学科表转换成一个map
    val sub_score_map: Map[Int, Int] = subject.map(
      sub => {
        (sub.subject_id, sub.subject_score)
      }
    ).toMap

    //这个是每个学生的每个分数
    //现在需要将scores表中所有的数据转换成百分制
    //这里需要先对数据做一个归一化
    //每一门科目的分数不同,加入150满分,考了90,这里就需要换算成60
    val id_new_score: List[(Int, Double)] = scores.map(
      sco => {
        val id = sco.id
        val stu_score = sco.score //学生每一门科目的成绩
        val subject_id = sco.subject_id
        val subject_score = sub_score_map(subject_id) //学科的总分
        val new_score = stu_score.toDouble / subject_score * 100
        (id, new_score)
      }
    )

    //这里计算每一个学生的平均分
    val id_avg_score: Map[Int, Double] = id_new_score.groupBy(id => id._1)
      .map(kv => {
        val id = kv._1
        val scores: List[(Int, Double)] = kv._2
        val avg_score = scores.map(_._2).sum / scores.size.toDouble
        (id, avg_score)
      })

    //接下来计算每个学生的方差
    val variance_id: List[Int] = id_new_score.map(
      kv => {
        val id = kv._1
        val avg_score: Double = id_avg_score(id)
        val new_score: Double = kv._2

        //计算 (分数 - 平均分)^2
        (id, Math.pow((new_score - avg_score), 2))
      }
    ).groupBy(id => id._1)
      .map(kv => {
        val id = kv._1
        val values: List[(Int, Double)] = kv._2
        val variance = values.map(_._2).sum / values.size
        (id, variance)
      }).toList
      .sortBy(-_._2)
      .take(100)
      .map(_._1)

    //将scores转成map格式的集合,id作为key,自己本身作为value
    val scoMap: Map[Int, List[(Int, Scores)]] = scores
      .map(sco => {
        val id: Int = sco.id
        (id, sco) //这里不能直接变成map,因为里面有重复的id,直接变成map的话,会覆盖,所以要先分组
      })
      .groupBy(sco => sco._1) //这里groupby之后得到的就是map了

    //将subject转换成map集合,科目id作为key,自己本身作为value
    val subMap: Map[Int, String] = subject
      .map(sub => {
        val subject_id: Int = sub.subject_id
        val subject_name: String = sub.subject_name
        (subject_id, subject_name)
      }).toMap

    //从学生表中根据学生id 提取 学生姓名、学生班级(学生信息)
    students
      .filter(stu=> variance_id.contains(stu.id))
      //filter之后,现在数据的样式是这样的Students(1500100080,巫景彰,21,男,理科五班)
      //现在根据学生id补上 科目名,分数
      //分数从score表中取,传入一个id,返回一个分数(这是一个map的数据格式,所以额外构建一个分数map,key就是id,value就是本身)
      //科目名从subject表中取,传入一个科目id,返回一个科目名
      .flatMap(stu=>{
      val top10stuLB = ListBuffer[(Int,String,String,String,Int)]()
      val id: Int = stu.id
      val name: String = stu.name
      val clazz: String = stu.clazz
      val values: List[(Int, Scores)] = scoMap(stu.id)
      val list_score: List[Scores] = values.map(sco=>sco._2)
      list_score.map(list=>{
        val stu_score: Int = list.score
        val subject_id: Int = list.subject_id
        val subject_name: String = subMap.get(subject_id).get
        top10stuLB.append((id,name,clazz,subject_name,stu_score))
      })

      //      values //这里就把成绩前十的学生的成绩表输出出来了
      //      (id,name,clazz,values) //这里返回的数据,里面的values中有六条数据,想要这六条数据一条条列出来,就需要用到flatmap,然后返回一个集合
      top10stuLB
    })
      .foreach(println)

  }

整体代码

import java.util

import org.junit.{Before, Test}

import scala.collection.immutable
import scala.collection.immutable.StringOps
import scala.collection.mutable.ListBuffer
import scala.io.{BufferedSource, Source}


/*
  1.统计班级人数
  2.统计学生总分
 */

class test {
  //这里会经常用到学生表、成绩表、学科表,所以我们这里构建三个list,将数据存入
  //存入之后的数据方便之后使用
  var students:List[Students] = _
  var scores:List[Scores] = _
  var subject:List[Subject] = _

  @Before
  def read_file: Unit ={
    //使用@Test这种方式的时候,默认的运行路径是在这个工作空间下面
    //而不是在整个项目下面,所以这里导入项目路径的时候会有一些问题
    val stu_bs: BufferedSource = Source.fromFile("D:\\BigDaTa\\JAVA项目\\ShuJia01\\data\\students.txt")

    val sco_bs: BufferedSource = Source.fromFile("D:\\BigDaTa\\JAVA项目\\ShuJia01\\data\\score.txt")

    val sub_bc: BufferedSource = Source.fromFile("D:\\BigDaTa\\JAVA项目\\ShuJia01\\data\\subject.txt")

    students = stu_bs //这里获取的是迭代器,不方便处理,所以转换成list
      .getLines()
      .toList
      .map(
        line => {
          val splits: Array[String] = line.split(",")
          val id: Int = splits(0).toInt
          val name: String = splits(1)
          val age: Int = splits(2).toInt
          val gender: String = splits(3)
          val clazz: String = splits(4)
          Students(id,name,age,gender,clazz)
        }
      )

    scores = sco_bs
        .getLines()
        .toList
        .map(line=>{
          val strings: Array[String] = line.split(",")
          val id: Int = strings(0).toInt
          val subject_id: Int = strings(1).toInt
          val score: Int = strings(2).toInt
          Scores(id,subject_id,score)
        })

    subject = sub_bc
        .getLines()
        .toList
        .map(line=>{
          val strings: Array[String] = line.split(",")
          val subject_id: Int = strings(0).toInt
          val subject_name: String = strings(1)
          val subject_score: Int = strings(2).toInt
          Subject(subject_id,subject_name,subject_score)
        })

      
    stu_bs.close()

    sco_bs.close()

    sub_bc.close()
  }

  @Test
  def printAll: Unit ={
    students.take(5).foreach(println)
    scores.take(5).foreach(println)
    subject.take(5).foreach(println)
  }

  //1.统计班级人数
  @Test
  def clazz_people_sum: Unit ={
    students
      .groupBy(stu=>stu.clazz)
      .map(kv=>{
        val clazz: String = kv._1
        val number: Int = kv._2.size
        (clazz,number)
      })
      .foreach(println)
  }

  //2.统计学生总分
  @Test
  def sum_score: Unit ={
    scores
      .groupBy(sco=>sco.id)
      .map(sco=>{
        val id: Int = sco._1
        val sco_list: List[Scores] = sco._2
        val sum = sco_list.map(sco => {
          sco.score
        }).sum
        (id,sum)
      })
      .foreach(println)

  }

  //3.统计年级排名前十学生各科的分数 [学号,学生姓名,学生班级,科目名,分数]
  @Test
  def sum_score_top10_stu: Unit ={
    //groupby之后的map如果返回的也是kv格式的数据,则默认会生成一个map
    //取出总分排名前十学生的id
    val top10ids: List[Int] = scores
      .groupBy(sco => sco.id)
      .map(sco => {
        val id: Int = sco._1
        val sco_list: List[Scores] = sco._2
        val sum_score = sco_list.map(sco => {
          sco.score
        }).sum
        (id, sum_score) //这里是一个map,没法排序,要转换成list
      }).toList
      //      .sortBy(sco=>sco._2)//这样是从小到大排序
      .sortBy(sco => -sco._2) //这样是从大到小排序
      .take(10)
      .map(sco => sco._1)

    //将scores转成map格式的集合,id作为key,自己本身作为value
    val scoMap: Map[Int, List[(Int, Scores)]] = scores
      .map(sco => {
        val id: Int = sco.id
        (id, sco) //这里不能直接变成map,因为里面有重复的id,直接变成map的话,会覆盖,所以要先分组
      })
      .groupBy(sco => sco._1) //这里groupby之后得到的就是map了

    //将subject转换成map集合,科目id作为key,自己本身作为value
    val subMap: Map[Int, String] = subject
      .map(sub => {
        val subject_id: Int = sub.subject_id
        val subject_name: String = sub.subject_name
        (subject_id, subject_name)
      }).toMap

    //从学生表中根据学生id 提取 学生姓名、学生班级(学生信息)
    students
      .filter(stu=> top10ids.contains(stu.id))
      //filter之后,现在数据的样式是这样的Students(1500100080,巫景彰,21,男,理科五班)
      //现在根据学生id补上 科目名,分数
      //分数从score表中取,传入一个id,返回一个分数(这是一个map的数据格式,所以额外构建一个分数map,key就是id,value就是本身)
      //科目名从subject表中取,传入一个科目id,返回一个科目名
      .flatMap(stu=>{
      val top10stuLB = ListBuffer[(Int,String,String,String,Int)]()
      val id: Int = stu.id
      val name: String = stu.name
      val clazz: String = stu.clazz
      val values: List[(Int, Scores)] = scoMap(stu.id)
      val list_score: List[Scores] = values.map(sco=>sco._2)
      list_score.map(list=>{
        val stu_score: Int = list.score
        val subject_id: Int = list.subject_id
        val subject_name: String = subMap.get(subject_id).get
        top10stuLB.append((id,name,clazz,subject_name,stu_score))
      })

//      values //这里就把成绩前十的学生的成绩表输出出来了
//      (id,name,clazz,values) //这里返回的数据,里面的values中有六条数据,想要这六条数据一条条列出来,就需要用到flatmap,然后返回一个集合
      top10stuLB
    })
      .foreach(println)
  }

  //4.统计总分大于年级平均分的学生 [学号,姓名,班级,总分]
  @Test
  def sum_score_beyond_avg: Unit ={

    //构建students表的map 学生id和学生信息
    val stuMap: Map[Int, Students] = students
      .map(stu => {
        val id = stu.id
        (id, stu)
      }).toMap

    //年级平均分
    val sum: Int = scores
      .groupBy(sco => sco.id)
      .map(sc => {
        val id: Int = sc._1
        val list_score: List[Scores] = sc._2
        val sum: Int = list_score.map(sc => {
          sc.score
        }).sum
        (sum)
      }).sum
    val avg_score:Double = sum/1000 //年级平均分

    //学生总分
    val stu_sum: Map[Int, Int] = scores
      .groupBy(sco => sco.id)
      .map(sco => {
        val id: Int = sco._1
        val list_score: List[Scores] = sco._2
        val sum = list_score.map(list => {
          list.score
        }).sum
        (id, sum)
      })

    //找出总分大于年级平均分的学生 id和分数
    stu_sum
      .filter(stu=>{stu._2>avg_score})
      .map(stu=>{
        val id: Int = stu._1
        val stu_sum: Int = stu._2
        val student: Students = stuMap.get(id).get
        val name = student.name
        val clazz = student.clazz
        (id,name,clazz,stu_sum)
      })
      .foreach(println)


  }


  //5.取出每门课都及格的学生 [学号,姓名,班级,科目,分数]
  @Test
  def every_subject_pass: Unit ={
    //这里需要对成绩做一个归一化,也就是说有的成绩是150满分,有的是100满分,需要做一个归一化
    val stu_pass_map: Map[Int, (String, Double)] = subject.map(
      sub => {
        val subject_id: Int = sub.subject_id
        val subject_name: String = sub.subject_name
        val subject_score: Int = sub.subject_score
        val pass: Double = subject_score * 0.6 //及格分数
        (subject_id, (subject_name, pass))
      }
    ).toMap

    //六门课都及格的学生id,这里最关键的就是这里求学生id,之后根据学生id求其他的内容都可以从上面取了
    val stu_id: List[Int] = scores.filter(
      sco => {
        var flag = false
        if (sco.score >= stu_pass_map(sco.subject_id)._2) {
          flag = true
        }
        flag
      })
      //在这之后再对学生id进行分组,统计还有几条成绩记录,要是还有六条说明没有过滤掉
      //没有过滤掉成绩,说明每门课都及格了
      .groupBy(sco => sco.id)
      .map(kv => {
        val id: Int = kv._1
        val scores: List[Scores] = kv._2 //这里得到的是一条条的学生成绩
        val size = scores.size
        (id, size)
      })
      .filter(kv => {
        kv._2 == 6
      })
      .map(_._1)
      .toList

    //将scores转成map格式的集合,id作为key,自己本身作为value
    val scoMap: Map[Int, List[(Int, Scores)]] = scores
      .map(sco => {
        val id: Int = sco.id
        (id, sco) //这里不能直接变成map,因为里面有重复的id,直接变成map的话,会覆盖,所以要先分组
      })
      .groupBy(sco => sco._1) //这里groupby之后得到的就是map了

    //将subject转换成map集合,科目id作为key,自己本身作为value
    val subMap: Map[Int, String] = subject
      .map(sub => {
        val subject_id: Int = sub.subject_id
        val subject_name: String = sub.subject_name
        (subject_id, subject_name)
      }).toMap


    //从学生表中根据学生id 提取 学生姓名、学生班级(学生信息)
    students
      .filter(stu=> stu_id.contains(stu.id))
      //filter之后,现在数据的样式是这样的Students(1500100080,巫景彰,21,男,理科五班)
      //现在根据学生id补上 科目名,分数
      //分数从score表中取,传入一个id,返回一个分数(这是一个map的数据格式,所以额外构建一个分数map,key就是id,value就是本身)
      //科目名从subject表中取,传入一个科目id,返回一个科目名
      .flatMap(stu=>{
      val top10stuLB = ListBuffer[(Int,String,String,String,Int)]()
      val id: Int = stu.id
      val name: String = stu.name
      val clazz: String = stu.clazz
      val values: List[(Int, Scores)] = scoMap(stu.id)
      val list_score: List[Scores] = values.map(sco=>sco._2)
      list_score.map(list=>{
        val stu_score: Int = list.score
        val subject_id: Int = list.subject_id
        val subject_name: String = subMap.get(subject_id).get
        top10stuLB.append((id,name,clazz,subject_name,stu_score))
      })

      //      values //这里就把成绩前十的学生的成绩表输出出来了
      //      (id,name,clazz,values) //这里返回的数据,里面的values中有六条数据,想要这六条数据一条条列出来,就需要用到flatmap,然后返回一个集合
      top10stuLB
    })
      .foreach(println)
  }

  @Test
  //6.统计每个班级的前三名 [学号,姓名,班级,分数]
  def clazz_sum_score_top3: Unit ={
    //这里输入一个学生id就会返回学生信息
    //需要关联student表,所以这里将students表转换成map
    val stuMap: Map[Int, Students] = students
      .map(
        stu => {
          val id = stu.id
          (id, stu)
        }
      )
      .toMap

    //先求出所有学生的总分-学生总分表
    val stu_score: Map[Int, Int] = scores
      .groupBy(sc => sc.id)
      .map(sc => {
        val id: Int = sc._1
        val list_score: List[Scores] = sc._2
        val sum: Int = list_score.map(list => {
          list.score
        }).sum
        (id, sum)
      })

    stu_score.map(
      stu=>{
        val id: Int = stu._1 //学生id
        val stu_sum: Int = stu._2 //学生总分
        val students: Students = stuMap(id)
        val name: String = students.name //学生姓名
        val clazz: String = students.clazz //学生班级
        (id,name,clazz,stu_sum)
      }
    )
      //按照班级分组,取前三
      .groupBy(stu=>stu._3)
      .flatMap(kv=>{ //flatMap需要返回一个集合
        val clazz: StringOps = kv._1
        val stuList: List[(Int, String, String, Int)] = kv._2.toList
        val top3students: List[(Int, String, String, Int)] = stuList
          .sortBy(stu => -stu._4)
          .take(3)
        top3students
      })


      .foreach(println)

  }

  //什么是偏科最严重(方差),根据六门科目的成绩分别计算方差
  //7、统计偏科最严重的前100名学生  [学号,姓名,班级,科目,分数]
  @Test
  def unbalanceTop100Stu: Unit ={
    //先将学科表转换成一个map
    val sub_score_map: Map[Int, Int] = subject.map(
      sub => {
        (sub.subject_id, sub.subject_score)
      }
    ).toMap

    //这个是每个学生的每个分数
    //现在需要将scores表中所有的数据转换成百分制
    //这里需要先对数据做一个归一化
    //每一门科目的分数不同,加入150满分,考了90,这里就需要换算成60
    val id_new_score: List[(Int, Double)] = scores.map(
      sco => {
        val id = sco.id
        val stu_score = sco.score //学生每一门科目的成绩
        val subject_id = sco.subject_id
        val subject_score = sub_score_map(subject_id) //学科的总分
        val new_score = stu_score.toDouble / subject_score * 100
        (id, new_score)
      }
    )

    //这里计算每一个学生的平均分
    val id_avg_score: Map[Int, Double] = id_new_score.groupBy(id => id._1)
      .map(kv => {
        val id = kv._1
        val scores: List[(Int, Double)] = kv._2
        val avg_score = scores.map(_._2).sum / scores.size.toDouble
        (id, avg_score)
      })

    //接下来计算每个学生的方差
    val variance_id: List[Int] = id_new_score.map(
      kv => {
        val id = kv._1
        val avg_score: Double = id_avg_score(id)
        val new_score: Double = kv._2

        //计算 (分数 - 平均分)^2
        (id, Math.pow((new_score - avg_score), 2))
      }
    ).groupBy(id => id._1)
      .map(kv => {
        val id = kv._1
        val values: List[(Int, Double)] = kv._2
        val variance = values.map(_._2).sum / values.size
        (id, variance)
      }).toList
      .sortBy(-_._2)
      .take(100)
      .map(_._1)

    //将scores转成map格式的集合,id作为key,自己本身作为value
    val scoMap: Map[Int, List[(Int, Scores)]] = scores
      .map(sco => {
        val id: Int = sco.id
        (id, sco) //这里不能直接变成map,因为里面有重复的id,直接变成map的话,会覆盖,所以要先分组
      })
      .groupBy(sco => sco._1) //这里groupby之后得到的就是map了

    //将subject转换成map集合,科目id作为key,自己本身作为value
    val subMap: Map[Int, String] = subject
      .map(sub => {
        val subject_id: Int = sub.subject_id
        val subject_name: String = sub.subject_name
        (subject_id, subject_name)
      }).toMap

    //从学生表中根据学生id 提取 学生姓名、学生班级(学生信息)
    students
      .filter(stu=> variance_id.contains(stu.id))
      //filter之后,现在数据的样式是这样的Students(1500100080,巫景彰,21,男,理科五班)
      //现在根据学生id补上 科目名,分数
      //分数从score表中取,传入一个id,返回一个分数(这是一个map的数据格式,所以额外构建一个分数map,key就是id,value就是本身)
      //科目名从subject表中取,传入一个科目id,返回一个科目名
      .flatMap(stu=>{
      val top10stuLB = ListBuffer[(Int,String,String,String,Int)]()
      val id: Int = stu.id
      val name: String = stu.name
      val clazz: String = stu.clazz
      val values: List[(Int, Scores)] = scoMap(stu.id)
      val list_score: List[Scores] = values.map(sco=>sco._2)
      list_score.map(list=>{
        val stu_score: Int = list.score
        val subject_id: Int = list.subject_id
        val subject_name: String = subMap.get(subject_id).get
        top10stuLB.append((id,name,clazz,subject_name,stu_score))
      })

      //      values //这里就把成绩前十的学生的成绩表输出出来了
      //      (id,name,clazz,values) //这里返回的数据,里面的values中有六条数据,想要这六条数据一条条列出来,就需要用到flatmap,然后返回一个集合
      top10stuLB
    })
      .foreach(println)

  }

  //封装一个函数:根据id取出学生信息
  def id_info(id:List[Int]): Unit ={
    //将scores转成map格式的集合,id作为key,自己本身作为value
    val scoMap: Map[Int, List[(Int, Scores)]] = scores
      .map(sco => {
        val id: Int = sco.id
        (id, sco) //这里不能直接变成map,因为里面有重复的id,直接变成map的话,会覆盖,所以要先分组
      })
      .groupBy(sco => sco._1) //这里groupby之后得到的就是map了

    //将subject转换成map集合,科目id作为key,自己本身作为value
    val subMap: Map[Int, String] = subject
      .map(sub => {
        val subject_id: Int = sub.subject_id
        val subject_name: String = sub.subject_name
        (subject_id, subject_name)
      }).toMap

    //从学生表中根据学生id 提取 学生姓名、学生班级(学生信息)
    students
      .filter(stu=> id.contains(stu.id))
      //filter之后,现在数据的样式是这样的Students(1500100080,巫景彰,21,男,理科五班)
      //现在根据学生id补上 科目名,分数
      //分数从score表中取,传入一个id,返回一个分数(这是一个map的数据格式,所以额外构建一个分数map,key就是id,value就是本身)
      //科目名从subject表中取,传入一个科目id,返回一个科目名
      .flatMap(stu=>{
      val top10stuLB = ListBuffer[(Int,String,String,String,Int)]()
      val id: Int = stu.id
      val name: String = stu.name
      val clazz: String = stu.clazz
      val values: List[(Int, Scores)] = scoMap(stu.id)
      val list_score: List[Scores] = values.map(sco=>sco._2)
      list_score.map(list=>{
        val stu_score: Int = list.score
        val subject_id: Int = list.subject_id
        val subject_name: String = subMap.get(subject_id).get
        top10stuLB.append((id,name,clazz,subject_name,stu_score))
      })

      //      values //这里就把成绩前十的学生的成绩表输出出来了
      //      (id,name,clazz,values) //这里返回的数据,里面的values中有六条数据,想要这六条数据一条条列出来,就需要用到flatmap,然后返回一个集合
      top10stuLB
    })
      .foreach(println)

  }
  
}

case class Students(id:Int,name:String,age:Int,gender:String,clazz:String)
case class Scores(id:Int,subject_id:Int,score:Int)
case class Subject(subject_id:Int,subject_name:String,subject_score:Int)


感谢阅读,我是啊帅和和,一位大数据专业大四学生,祝你快乐。

  • 2
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

啊帅和和。

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值