Spark SQL

第1关:RDD转换为DataFrame实现文本文件数据源读取

import org.apache.spark.sql.{Row, SparkSession}
import org.apache.spark.sql.types._
import scala.collection.mutable
import java.text.SimpleDateFormat

object sparkSQL01 {
  def main(args: Array[String]): Unit = {
    val spark = SparkSession
      .builder()
      .master("local")
      .appName("test")
      .config("spark.sql.shuffle.partitions", "5")
      .getOrCreate()
    /** ************************ student表结构*****************************/
    val studentRDD = spark.sparkContext.textFile("data/student.txt")
    val StudentSchema: StructType = StructType(mutable.ArraySeq( //学生表
      StructField("Sno", StringType, nullable = false), //学号
      StructField("Sname", StringType, nullable = false), //学生姓名
      StructField("Ssex", StringType, nullable = false), //学生性别
      StructField("Sbirthday", StringType, nullable = true), //学生出生年月
      StructField("SClass", StringType, nullable = true) //学生所在班级
    ))
    val studentData = studentRDD.map(_.split(",")).map(attributes => Row(attributes(0),attributes(1),attributes(2),attributes(3),attributes(4)))
    val studentDF = spark.createDataFrame(studentData,StudentSchema)
    studentDF.createOrReplaceTempView("student")
/** ************************ teacher表结构*****************************/
    val teacherRDD = spark.sparkContext.textFile("data/teacher.txt")
    val TeacherSchema: StructType = StructType(mutable.ArraySeq( //教师表
      StructField("Tno", StringType, nullable = false), //教工编号(主键)
      StructField("Tname", StringType, nullable = false), //教工姓名
      StructField("Tsex", StringType, nullable = false), //教工性别
      StructField("Tyear", IntegerType, nullable = true), //教工出生年月
      StructField("Prof", StringType, nullable = true), //职称
      StructField("Depart", StringType, nullable = false) //教工所在部门
    ))
    val teacherData = teacherRDD.map(_.split(",")).map(attributes => Row(attributes(0),attributes(1),attributes(2),attributes(3).toInt,attributes(4),attributes(5)))
    val teacherDF = spark.createDataFrame(teacherData,TeacherSchema)
    teacherDF.createOrReplaceTempView("teacher")
    /** ************************ course表结构*****************************/
    val courseRDD = spark.sparkContext.textFile("data/course.txt")
    val CourseSchema: StructType = StructType(mutable.ArraySeq( //课程表
      StructField("Cno", StringType, nullable = false), //课程号
      StructField("Cname", StringType, nullable = false), //课程名称
      StructField("Tno", StringType, nullable = false) //教工编号
    ))
    val courseData = courseRDD.map(_.split(",")).map(attributes => Row(attributes(0),attributes(1),attributes(2)))
    val courseDF = spark.createDataFrame(courseData,CourseSchema)
    courseDF.createOrReplaceTempView("course")
/** ************************ score表结构*****************************/
    val scoreRDD = spark.sparkContext.textFile("data/score.txt")
    val ScoreSchema: StructType = StructType(mutable.ArraySeq( //成绩表
      StructField("Sno", StringType, nullable = false), //学号(外键)
      StructField("Cno", StringType, nullable = false), //课程号(外键)
      StructField("Degree", IntegerType, nullable = true) //成绩
    ))
    val scoreData = scoreRDD.map(_.split(",")).map(attributes => Row(attributes(0),attributes(1),attributes(2).toInt))
    val scoreDF = spark.createDataFrame(scoreData,ScoreSchema)
    scoreDF.createOrReplaceTempView("score")
/** ************************对各表的处理*****************************/
//按照班级排序显示所有学生信息
    spark.sql("SELECT * FROM student ORDER BY Sno").show()
// 查询“计算机系”与“电子工程系“不同职称的教师的Tname和Prof。
    spark.sql("SELECT tname, prof " +
      "FROM Teacher " +
      "WHERE prof NOT IN (SELECT a.prof " +
      "FROM (SELECT prof " +
      "FROM Teacher " +
      "WHERE depart = 'department of computer' " +
      ") a " +
      "JOIN (SELECT prof " +
      "FROM Teacher " +
      "WHERE depart = 'department of electronic engineering' " +
      ") b ON a.prof = b.prof) ").orderBy("tname").show(false)
//显示性别为nv的教师信息
    teacherDF.filter("Tsex = 'female'").show(false)
//显示不重复的教师部门信息
    teacherDF.select("Depart").distinct().show(false)
    val maxsc = scoreDF.agg("Degree"->"max").show()
    val meansc = scoreDF.groupBy("Cno").agg("Degree"->"mean").orderBy("Cno").show()
// meansc.write.format("json").save("mean.json")
  }
}

评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

@Anges

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值