文章实训一、二、建表语句

该文展示了两个使用Spark进行数据处理的实训例子。实训一涉及对Hive数据库中的student表进行查询,包括展示数据库和表,计算教师课程的平均分,并将结果保存到新表。实训二涵盖了学生、课程和分数的数据联合查询,如获取学生选课总数、总成绩,以及特定条件下的学生信息,并计算平均成绩及排名。
摘要由CSDN通过智能技术生成

实训一

import org.apache.log4j
import org.apache.log4j.{Level, Logger}
import org.apache.spark.sql.{SaveMode, SparkSession}
import org.apache.spark.sql.functions.col

object Spark_sx1 {
  Logger.getLogger("org.apache.spark").setLevel(Level.ERROR)
  def main(args: Array[String]): Unit = {
    val spark = SparkSession
      .builder()
      .appName(this.getClass.getSimpleName)
      .master("local[*]")
      .config("dfs.client.use.datanode.hostname","true")
      .enableHiveSupport()
      .getOrCreate()
    //指定数据库
    val sq = "show databases"
    spark.sql(sq).show()
    val sql = "use student"
    spark.sql(sql)
    val sql2 = "show tables"
    spark.sql(sql2).show()

    val score = spark.sql("select * from student.score;")
    val course = spark.sql("select * from student.course;")
    val student = spark.sql("select * from student.student;")
    val teacher = spark.sql("select * from student.teacher;")

//    val stu1 = spark.sql("select teacher.t_id,course.c_id,avg(score.s_score) from teacher,course,score where teacher.t_id=course.t_id and course.c_id=score.c_id").show()

    val stu2 = score.join(course,"c_id").join(teacher,"t_id")
    val stu3 = stu2.select("t_id","c_id","s_score")

    val stu4 = stu3.groupBy("t_id","c_id").avg("s_score")
    stu4.show()
    val stu5 = stu4.withColumn("avg",col( "avg(s_score)").cast("int")).drop("avg(s_score)")
      stu5.show()
    //将数据写进去
    stu5.write.format("hive").mode(SaveMode.Overwrite).saveAsTable("student.teacher_courseAge")
  }
}


实训二

import org.apache.log4j.{Level, Logger}
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions.{col, desc}
import org.apache.spark.sql.types.DataTypes
object Spark_sxxxx2 {
  Logger.getLogger("org.apache.spark").setLevel(Level.ERROR)
  def main(args: Array[String]): Unit = {
    val spark =SparkSession.builder()
      .appName(this.getClass.getSimpleName)
      .master("local[*]")
      .config("dfs.client.use.datanode.hostname", "true")
      .enableHiveSupport()
      .getOrCreate()
    val sc =spark.sparkContext
    sc.setLogLevel("ERROR")
    //  (2)  读数据
    val score = spark.sql("select * from school.score")
    val course = spark.sql("select * from school.course")
    val student = spark.sql("select * from school.student")
    //(3)学生学号、姓名、选课总数、所有课程的总成绩
    val s1 =student.select("s_id","s_name").join(score,"s_id")
    s1.show()
    //学生学号、姓名、选课总数
    val s2=s1.groupBy("s_id").count().orderBy("s_id")
    //    s2.show()
    //学生学号、所有课程总成绩
    val s3=s1.groupBy("s_id").sum("s_score").orderBy("s_id")
    //    s3.show()
    val s4 = s1.select("s_id", "s_name").join(s2, "s_id").join(s3, "s_id").distinct()
    val s5 = s4.withColumnRenamed("sum(s_score)", "sums").orderBy("s_id")
    s5.show()
    //(4)课程名称“数学”,且分数小于60的学生的姓名,分数
    val s6 = s1.join(course, "c_id")
    //    s6.show()
    val s7 = s6.where("c_name='数学' and s_score < 60").select("s_name","s_score")
    s7.show()
    //(5)学生的平均成绩及名次
    val s8 = s1.groupBy("s_id").avg("s_score").orderBy(desc("avg(s_score)"))
    //    val s9 = s8.withColumn("avg", col("avg(s_score)").cast("int")).drop("avg(s_score)")
    val s9 = s8.withColumn("avg", col("avg(s_score)").cast(DataTypes.IntegerType)).drop("avg(s_score)")
    //    s9.show()
    val s10 = s9.selectExpr("s_id", "avg", "rank() over (order by avg desc) as order")
    s10.show()
  }
}

建表语句

create table student(
s_id double,
s_name string,
s_brith string,
s_sex string
)
row format delimited fields terminated by ',';
集群模式:load data inpath '/data/student.txt'overwrite into table student;
本地模式:load data local inpath '/root/student.txt'overwrite into table student;

create table score(
s_id double,
c_id double,
s_score double
)
row format delimited fields terminated by ',';
集群模式:load data inpath '/data/score.txt'overwrite into table score;
本地模式:load data local inpath '/root/score.txt'overwrite into table score;

create table course(
c_id double,
c_name string,
t_id double
)
row format delimited fields terminated by ',';
集群模式:load data inpath '/data/course.txt'overwrite into table course;
本地模式:load data local inpath '/root/course.txt'overwrite into table course;

create table teacher(
t_id double,
t_name string
)
row format delimited fields terminated by ',';
集群模式:load data inpath '/data/teacher.txt'overwrite into table teacher;
本地模式:load data local inpath '/root/teacher.txt'overwrite into table teacher;

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值