2020.11.15周末练习(MySQL50题的ScalaSQL写法)

package nj.zb.kb09.gaoji

import java.util.Properties

import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}

object Mysql50 {
  def main(args: Array[String]): Unit = {
    val spark: SparkSession = SparkSession.builder().appName("mysql50").master("local[*]").getOrCreate()
    import spark.implicits._

    val url="jdbc:mysql://192.168.237.100:3306/school"
    val user="root"
    val pwd="ok"
    val driver="com.mysql.jdbc.Driver"
    val prop = new Properties()
    prop.setProperty("user",user)
    prop.setProperty("password",pwd)
    prop.setProperty("driver",driver)

    val courseTable="Course"
    val scoreTable="Score"
    val studentTable="Student"
    val teacherTable="Teacher"

    val courseTableDF: DataFrame = spark.read.jdbc(url,courseTable,prop)
    val scoreTableDF: DataFrame = spark.read.jdbc(url,scoreTable,prop)
    val studentTableDF: DataFrame = spark.read.jdbc(url,studentTable,prop)
    val teacherTableDF: DataFrame = spark.read.jdbc(url,teacherTable,prop)

    //1、查询"01"课程比"02"课程成绩高的学生的信息及课程分数
    val frame: DataFrame = scoreTableDF.join(scoreTableDF,Seq("s_id"),"left")
    val ds: Dataset[Row] = frame.filter(x => (x.get(1).equals("01") && x.get(3).equals("02")
      && x.get(2).asInstanceOf[Integer] > x.get(4).asInstanceOf[Integer]))
    //ds.show()
    val df1: DataFrame = ds.join(studentTableDF,Seq("s_id"))
    df1.show()

    //2、查询"01"课程比"02"课程成绩低的学生的信息及课程分数
    frame.show()
    val ds2: Dataset[Row] = frame.filter(x => (x.get(1).equals("01") && x.get(3).equals("02") &&
      x.get(2).asInstanceOf[Integer] < x.get(4).asInstanceOf[Integer]))
    ds2.show()
    val df2: DataFrame = ds2.join(studentTableDF,Seq("s_id"))
    df2.show()

    //3、查询平均成绩大于等于60分的同学的学生编号和学生姓名和平均成绩
    scoreTableDF.show()
    scoreTableDF.groupBy("s_id").agg(Map("s_score"->"avg")).filter(x=> x.get(1).asInstanceOf[Double]>60).join(studentTableDF,"s_id").drop("s_birth","s_sex").show()

    //4、查询平均成绩小于60分的同学的学生编号和学生姓名和平均成绩(包括有成绩的和无成绩的)
    scoreTableDF.groupBy("s_id").agg(Map("s_score"->"avg")).filter(x=> x.get(1).asInstanceOf[Double]<60 || x.get(1)== null).join(studentTableDF,"s_id").drop("s_birth","s_sex").show()

    //5、查询所有同学的学生编号、学生姓名、选课总数、所有课程的总成绩
    scoreTableDF.groupBy("s_id").agg(Map("c_id"->"count","s_score"->"sum")).join(studentTableDF,"s_id").show()

    //6、查询"李"姓老师的数量
    teacherTableDF.createTempView("teacher")
    spark.sql("select * from teacher where t_name like '李%' ").show()

    //7、查询学过"张三"老师授课的同学的信息
    scoreTableDF.join(courseTableDF,"c_id").join(teacherTableDF,"t_id").filter(x=>x.get(5).equals("张三")).join(studentTableDF,"s_id").show()

    //8、查询没学过"张三"老师授课的同学的信息
    scoreTableDF.join(courseTableDF,"c_id").join(teacherTableDF,"t_id").join(studentTableDF,"s_id").createTempView("aa")
    spark.sql("select * from aa where s_id not in (select s_id from aa where t_name=\"张三\")").show()

    //9、查询学过编号为"01"并且也学过编号为"02"的课程的同学的信息
    scoreTableDF.where("c_id=01").join(studentTableDF,"s_id").show()

    //10、查询学过编号为"01"但是没有学过编号为"02"的课程的同学的信息
    studentTableDF.join(scoreTableDF.where("c_id = 2"),Seq("s_id"),"left_outer").as("s2")
      .where("s2.c_id is null").join(scoreTableDF.where("c_id = 1"),"s_id").show

    //11、查询没有学全所有课程的同学的信息
    studentTableDF.join(
    scoreTableDF.groupBy("s_id").count()  .as("s1"),
      Seq("s_id"),
      "left_outer"
    ).where(s"s1.count <> ${ courseTableDF.select("c_id").count() } or s1.count is null " ).show()

    //12、查询至少有一门课与学号为"01"的同学所学相同的同学的信息

    scoreTableDF.join(scoreTableDF.select("c_id").where("s_id=1"),"c_id").select("s_id").distinct().where("s_id<>1").join(scoreTableDF,"s_id").show()

    //13、查询和"01"号的同学学习的课程完全相同的其他同学的信息

    scoreTableDF.select("c_id").where("s_id=1").show()

    scoreTableDF.join(
      scoreTableDF.select("c_id").where("s_id=1"),"c_id"
    ).groupBy("s_id").count().where(s"count(1) = ${ scoreTableDF.where("s_id=1").count } and s_id <>1 ").join(studentTableDF,"s_id").show()

    //14、查询没学过"张三"老师讲授的任一门课程的学生姓名

    studentTableDF.join(
    scoreTableDF.join(
    courseTableDF.join(teacherTableDF,"t_id").where("t_name=\"张三\"")
    ,"c_id").select("s_id","t_name"),
    Seq("s_id"),"left_outer"
    ).where("t_name is null").show()

    //15、查询两门及其以上不及格课程的同学的学号,姓名及其平均成绩
    scoreTableDF.filter(x=> x.get(2).asInstanceOf[Integer]<60).groupBy("s_id").count().where("count > 2").join(studentTableDF,"s_id").show()

    //16、检索"01"课程分数小于60,按分数降序排列的学生信息
    import org.apache.spark.sql.functions._
    scoreTableDF.where("s_score<60 and c_id=1").orderBy(desc("s_score")).show()

    //17、按平均成绩从高到低显示所有学生的所有课程的成绩以及平均成绩
    scoreTableDF.groupBy("s_id").avg("s_score").orderBy(desc("avg(s_score)")).join(scoreTableDF,"s_id").show()

    //18.查询各科成绩最高分、最低分和平均分:以如下形式显示:课程ID,课程name,最高分,最低分,平均分,及格率,中等率,优良率,优秀率
    //--及格为>=60,中等为:70-80,优良为:80-90,优秀为:>=90
    scoreTableDF.groupBy("c_id").agg("s_score"->"max","s_score"->"min","s_score"->"avg")
    scoreTableDF.filter(x=> x.get(2).asInstanceOf[Integer]<60).show()
    scoreTableDF.filter(x=>x.get(2).asInstanceOf[Integer]<70).show()

    //19、按各科成绩进行排序,并显示排名
    scoreTableDF.join(studentTableDF,"s_id").selectExpr("*","row_number() over(partition by c_id order by s_score desc)  rank").show()

    //20、查询学生的总成绩并进行排名
    scoreTableDF.selectExpr("*","sum(s_score) over(partition by s_id) as sum_score").drop("s_score","c_id").distinct()
      .selectExpr("*","row_number() over(order by sum_score) as rank").show()

    //21、查询不同老师所教不同课程平均分从高到低显示:
    scoreTableDF.groupBy("c_id").avg("s_score").join(courseTableDF.join(teacherTableDF,"t_id"),"c_id").show()

    //22、查询所有课程的成绩第2名到第3名的学生信息及该课程成绩
    scoreTableDF.selectExpr("*","row_number() over(partition by c_id order by s_score) as rank").filter(x=> x.get(3).asInstanceOf[Integer] ==2 || x.get(3).asInstanceOf[Integer] ==3).show()

    //23.统计各科成绩各分数段人数:课程编号,课程名称,[100-85],[85-70],[70-60],[0-60]及所占百分比
    val rankDF: DataFrame = scoreTableDF.rdd.map(x => {
      if (x.get(2).asInstanceOf[Integer] < 60) (x.get(1).toString, 1)
      else if (x.get(2).asInstanceOf[Integer] < 70) (x.get(1).toString, 2)
      else if (x.get(2).asInstanceOf[Integer] < 85) (x.get(1).toString, 3)
      else (x.get(1).toString, 4)
    }).toDF("c_id", "rank")
    rankDF.groupBy("c_id").count().as("rnk1").join(rankDF.groupBy("c_id","rank").count().as("rnk2"),"c_id")
      .withColumn("r",$"rnk2.count"/$"rnk1.count").show()

    //24、查询学生平均成绩及其名次:
    scoreTableDF.groupBy("s_id").avg("s_score").selectExpr("*",s"row_number() over(order by 'avg(s_score)' desc ) as rank").show()

    //25、查询各科成绩前三名的记录
    scoreTableDF.selectExpr("*","row_number() over(partition by c_id order by s_score) as rnk").orderBy("c_id","rnk").where("rnk<=3").show()

    //26、查询每门课程被选修的学生数:
    scoreTableDF.groupBy("c_id").count().show()

    //27.查询出只有两门课程的全部学生的学号和姓名:
    scoreTableDF.groupBy("s_id").count().where("count=2").join(studentTableDF,"s_id").show()

    //28、查询男生、女生人数:
    studentTableDF.groupBy("s_sex").count().show()

    //29、查询名字中含有"风"字的学生信息:
    studentTableDF.where("s_name like \"%风%\"").show()

    //30、查询同名同姓学生名单,并统计同名人数:
    studentTableDF.groupBy("s_name").count().where("count > 1").show()

    //31、查询1990年出生的学生名单:
    studentTableDF.where("year(s_birth)=1990").show()

    //32、查询每门课程的平均成绩,结果按平均成绩降序排列,平均成绩相同时,按课程编号升序排列:
    import org.apache.spark.sql.functions._
    scoreTableDF.groupBy("c_id").avg("s_score").orderBy(desc("avg(s_score)"),asc("c_id") ).show()

    //33、查询平均成绩大于等于85的所有学生的学号、姓名和平均成绩:
    scoreTableDF.groupBy("s_id").avg("s_score").where("avg(s_score) >= 85").join(studentTableDF,"s_id").show()

    //34、查询课程名称为"数学",且分数低于60的学生姓名和分数:
    scoreTableDF.join(courseTableDF,"c_id").where("s_score < 60 and c_name = \"数学\"").join(studentTableDF,"s_id").show()

    //35、查询所有学生的课程及分数情况:
    scoreTableDF.join(studentTableDF,"s_id").join(courseTableDF,"c_id").show()

    //36.查询任何一门课程成绩在70分以上的姓名、课程名称和分数;
    scoreTableDF.where("s_score>70").join(studentTableDF,"s_id").join(courseTableDF,"c_id").show()

    //37.查询不及格的课程
    scoreTableDF.where("s_score<60").join(studentTableDF,"s_id").show()

    //38.查询课程编号为01且课程成绩在80分以上的学生的学号和姓名;
    scoreTableDF.where("c_id=1 and s_score>=80").join(studentTableDF,"s_id").show()

    //39.求每门课程的学生人数
    scoreTableDF.groupBy("c_id").count().show()

    //40、查询选修"张三"老师所授课程的学生中,成绩最高的学生信息及其成绩
    scoreTableDF.join(courseTableDF.join(teacherTableDF,"t_id"),"c_id").where("t_name = \"张三\"").orderBy("s_score").limit(1).join(studentTableDF,"s_id").show()

    //41、查询不同课程成绩相同的学生的学生编号、课程编号、学生成绩
    scoreTableDF.as("s1").crossJoin(scoreTableDF.as("s2")).where("s1.c_id != s2.c_id and s1.s_score=s2.s_score").show()

    //42、查询每门功成绩最好的前两名
    scoreTableDF.selectExpr("*","row_number() over(partition by c_id order by s_score desc) as rank").where("rank<=2").join(studentTableDF,"s_id")show()

    //43、统计每门课程的学生选修人数(超过5人的课程才统计)。要求输出课程号和选修人数,查询结果按人数降序排列,若人数相同,按课程号升序排列
    scoreTableDF.selectExpr("*","count() over(partition by c_id)  as cnt ").where("cnt>5").orderBy(desc("cnt"),asc("c_id")).show()

    //44、检索至少选修两门课程的学生学号
    scoreTableDF.groupBy("s_id").count().where("count>=2").show()

    //45、查询选修了全部课程的学生信息
    scoreTableDF.groupBy("s_id").count().where(s"count = ${courseTableDF.count()}").join(studentTableDF,"s_id").show()

    //46、查询各学生的年龄
    studentTableDF.selectExpr("*","cast( date_format(current_date(),'yyyy') as Int  )  - cast(  date_format( s_birth,'yyyy' ) as Int )  age").show()

    //47、查询本周过生日的学生
    //unix_timestamp(current_date())  当前日期
    //cast( concat_ws('-',date_format(current_date(),'yyyy'),date_format(s_birth,'MM'),date_format(s_birth,'dd') ) as date ),'yyyy-MM-dd')  将s_birth改成当前年份
    studentTableDF.where(" unix_timestamp( cast( concat_ws('-',date_format(current_date(),'yyyy'),date_format(s_birth,'MM'),date_format(s_birth,'dd') ) as date ),'yyyy-MM-dd') between unix_timestamp(current_date()) and unix_timestamp(date_sub(next_day(current_date(),'MON'),1),'yyyy-MM-dd') ").show()

    //48、查询下周过生日的学生
    //unix_timestamp(date_sub(next_day(current_date(),'MON'),1),'yyyy-MM-dd')   下周一
    //unix_timestamp(date_add(next_day(current_date(),'MON'),6),'yyyy-MM-dd')   下周末
    studentTableDF.where(" unix_timestamp( cast( concat_ws('-',date_format(current_date(),'yyyy'),date_format(s_birth,'MM'),date_format(s_birth,'dd') ) as date ),'yyyy-MM-dd') between unix_timestamp(date_sub(next_day(current_date(),'MON'),1),'yyyy-MM-dd') and unix_timestamp(date_add(next_day(current_date(),'MON'),6),'yyyy-MM-dd') ").show()

    //49、查询本月过生日的学生
    studentTableDF.where("month(s_birth) = month( current_date() )").show()

    //50、查询下月过生日的学生
    studentTableDF.where("month(s_birth) = month( current_date() ) +1 ").show()

  }
}

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值