实训一
import org.apache.log4j
import org.apache.log4j.{Level, Logger}
import org.apache.spark.sql.{SaveMode, SparkSession}
import org.apache.spark.sql.functions.col
object Spark_sx1 {
Logger.getLogger("org.apache.spark").setLevel(Level.ERROR)
def main(args: Array[String]): Unit = {
val spark = SparkSession
.builder()
.appName(this.getClass.getSimpleName)
.master("local[*]")
.config("dfs.client.use.datanode.hostname","true")
.enableHiveSupport()
.getOrCreate()
//指定数据库
val sq = "show databases"
spark.sql(sq).show()
val sql = "use student"
spark.sql(sql)
val sql2 = "show tables"
spark.sql(sql2).show()
val score = spark.sql("select * from student.score;")
val course = spark.sql("select * from student.course;")
val student = spark.sql("select * from student.student;")
val teacher = spark.sql("select * from student.teacher;")
// val stu1 = spark.sql("select teacher.t_id,course.c_id,avg(score.s_score) from teacher,course,score where teacher.t_id=course.t_id and course.c_id=score.c_id").show()
val stu2 = score.join(course,"c_id").join(teacher,"t_id")
val stu3 = stu2.select("t_id","c_id","s_score")
val stu4 = stu3.groupBy("t_id","c_id").avg("s_score")
stu4.show()
val stu5 = stu4.withColumn("avg",col( "avg(s_score)").cast("int")).drop("avg(s_score)")
stu5.show()
//将数据写进去
stu5.write.format("hive").mode(SaveMode.Overwrite).saveAsTable("student.teacher_courseAge")
}
}
实训二
import org.apache.log4j.{Level, Logger}
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions.{col, desc}
import org.apache.spark.sql.types.DataTypes
object Spark_sxxxx2 {
Logger.getLogger("org.apache.spark").setLevel(Level.ERROR)
def main(args: Array[String]): Unit = {
val spark =SparkSession.builder()
.appName(this.getClass.getSimpleName)
.master("local[*]")
.config("dfs.client.use.datanode.hostname", "true")
.enableHiveSupport()
.getOrCreate()
val sc =spark.sparkContext
sc.setLogLevel("ERROR")
// (2) 读数据
val score = spark.sql("select * from school.score")
val course = spark.sql("select * from school.course")
val student = spark.sql("select * from school.student")
//(3)学生学号、姓名、选课总数、所有课程的总成绩
val s1 =student.select("s_id","s_name").join(score,"s_id")
s1.show()
//学生学号、姓名、选课总数
val s2=s1.groupBy("s_id").count().orderBy("s_id")
// s2.show()
//学生学号、所有课程总成绩
val s3=s1.groupBy("s_id").sum("s_score").orderBy("s_id")
// s3.show()
val s4 = s1.select("s_id", "s_name").join(s2, "s_id").join(s3, "s_id").distinct()
val s5 = s4.withColumnRenamed("sum(s_score)", "sums").orderBy("s_id")
s5.show()
//(4)课程名称“数学”,且分数小于60的学生的姓名,分数
val s6 = s1.join(course, "c_id")
// s6.show()
val s7 = s6.where("c_name='数学' and s_score < 60").select("s_name","s_score")
s7.show()
//(5)学生的平均成绩及名次
val s8 = s1.groupBy("s_id").avg("s_score").orderBy(desc("avg(s_score)"))
// val s9 = s8.withColumn("avg", col("avg(s_score)").cast("int")).drop("avg(s_score)")
val s9 = s8.withColumn("avg", col("avg(s_score)").cast(DataTypes.IntegerType)).drop("avg(s_score)")
// s9.show()
val s10 = s9.selectExpr("s_id", "avg", "rank() over (order by avg desc) as order")
s10.show()
}
}
建表语句
create table student(
s_id double,
s_name string,
s_brith string,
s_sex string
)
row format delimited fields terminated by ',';
集群模式:load data inpath '/data/student.txt'overwrite into table student;
本地模式:load data local inpath '/root/student.txt'overwrite into table student;
create table score(
s_id double,
c_id double,
s_score double
)
row format delimited fields terminated by ',';
集群模式:load data inpath '/data/score.txt'overwrite into table score;
本地模式:load data local inpath '/root/score.txt'overwrite into table score;
create table course(
c_id double,
c_name string,
t_id double
)
row format delimited fields terminated by ',';
集群模式:load data inpath '/data/course.txt'overwrite into table course;
本地模式:load data local inpath '/root/course.txt'overwrite into table course;
create table teacher(
t_id double,
t_name string
)
row format delimited fields terminated by ',';
集群模式:load data inpath '/data/teacher.txt'overwrite into table teacher;
本地模式:load data local inpath '/root/teacher.txt'overwrite into table teacher;