原数据参考经典50道SQL练习题_sql50题-CSDN博客 ,请自行建表插值
一:连接数据库
val spark: SparkSession = SparkSession.builder().master("local[*]").appName("sparksql50").getOrCreate()
val sct: SparkContext = spark.sparkContext
var url = "jdbc:mysql://192.168.10.129:3306/sparksql50"
var user = "root"
var pwd = "123456"
var driver = "com.mysql.cj.jdbc.Driver"
val prop = new Properties()
prop.setProperty("user",user)
prop.setProperty("password",pwd)
prop.setProperty("driver",driver)
val course: DataFrame = spark.read.jdbc(url,"course",prop)
val sc: DataFrame = spark.read.jdbc(url,"sc",prop)
val student: DataFrame = spark.read.jdbc(url,"student",prop)
val teacher: DataFrame = spark.read.jdbc(url,"teacher",prop)
二:题目
1.查询" 01 “课程比” 02 “课程成绩高的学生的信息及课程分数
sc.as("s1").join(sc.as("s2"),"SID")
.where("s1.CID = 01 and s2.CID = 02 and s1.score > s2.score")
.join(student,"SID").show()
2.查询"01"课程比"02"课程成绩低的学生的信息及课程分数
sc.as("s1").join(sc.as("s2"),"SID")
.where("s1.CID = 01 and s2.CID = 02 and s1.score < s2.score")
.join(student,"SID").show()
3.查询平均成绩大于等于60分的同学的学生编号和学生姓名和平均成绩
sc.as("s1")
.groupBy("sid")
.avg("s1.score")
.where("avg(score)>=60")
.join(student,"sid")
.select("sid","sname","avg(score)")
.show()
4、查询平均成绩小于60分的同学的学生编号和学生姓名和平均成绩
注意,平均成绩可能为空值
sc.as("s1")
.groupBy("sid")
.avg("score")
.where("avg(score)<60 or avg(score) == null")
.join(student,"sid")
.select("sid","sname","avg(score)")
.show()
5.查询所有同学的学生编号、学生姓名、选课总数、所有课程的总成绩
student
.join(sc.groupBy("sid").sum("score"),"sid")
.join(sc.groupBy("sid").count(),"sid")
.show()
6.查询"李"姓老师的数量
val i: Long = teacher.where("tname like '李%'").count()
println(i)
7.查询学过"李四"老师授课的同学的信息
sc
.join(course,"cid")
.join(teacher.where("tname = '李四'"),"tid")
.join(student,"sid")
.select("sname","sage","ssex")
.show()
8.查询没有学过"李四"老师授课的同学的信息
sc.join(teacher.filter("tname == '李四'").join(course, Seq("tid")), Seq("cid"))
.join(student, Seq("sid"), "right")
.filter(x=>x(4) == null)
.select("sname","sage","ssex").show()
9.查询学过编号为"01"并且也学过编号为"02"的课程的同学的信息
student.join(sc,"sid").where("cid == 01")
.join(student.join(sc,"sid").where("cid == 02"),"sid")
.show()