spark从入门到放弃三十三:Spark Sql(6)hive sql 案例查询分数大于80分的同学

最新推荐文章于 2024-07-30 09:00:41 发布

WQ同学

最新推荐文章于 2024-07-30 09:00:41 发布

阅读量3.9k

点赞数

分类专栏： hive spark 大数据

本文链接：https://blog.csdn.net/u012957549/article/details/80032440

版权

spark 同时被 3 个专栏收录

122 篇文章 15 订阅

订阅专栏

大数据

43 篇文章 1 订阅

订阅专栏

hive

17 篇文章 0 订阅

订阅专栏

文章地址：http://www.haha174.top/article/details/258176
有两张表一个student_info 保存的是学生的姓名年龄另一个是 student_scores 保存的是学生的年龄和分数。现在将大于80分的学生的姓名，分数，年龄写入到一张表。
需要安装一个hive 可以参考：http://www.haha174.top/article/details/253250
下面给出java 示例

public class HiveDataSource {
    public static void main(String[] args) {
        SparkConf conf=new SparkConf().setAppName("HiveDataSource");
        JavaSparkContext sc=new JavaSparkContext(conf);
        // 创建HiveContext  注意这里接收的是SparkContext   不是 JavaSparkContext
        HiveContext sqlContext=new HiveContext(sc.sc());
        //第一个功能，使用HiveContext的Sql()/Hql
        sqlContext.sql("DROP TABLE IF EXISTS student_info");

       sqlContext.sql("CREATE  TABLE IF NOT EXISTS student_info (name STRING ,age INT)");
        System.out.println("============================create table success");
        //将学生的基本信息导入到StudentInfo  表
        sqlContext.sql("LOAD DATA LOCAL INPATH '/data/hive/student_info/student_info.txt' INTO TABLE  student_info");




        sqlContext.sql("DROP TABLE IF EXISTS student_scores");

       sqlContext.sql("CREATE  TABLE IF NOT EXISTS student_scores (name STRING ,score INT)");
        //将学生的基本分数导入到StudentInfo  表
        sqlContext.sql("LOAD DATA LOCAL INPATH '/data/hive/student_info/student_scores.txt' INTO TABLE  student_scores");
        //第二个功能接着将sql  返回的DataFrame  用于查询
        //执行sql  关联两张表查询大于80分的学生
        Dataset goodStudentDS=sqlContext.sql("SELECT ss.name ,s1.age,ss.score from student_info s1 JOIN  student_scores ss ON s1.name=ss.name WHERE   ss.score>=80");



        //第三个功能，可以将 DataFrame  中的数据 理论上来说DataFrame  对应的RDD  数据  是ROW  即可
        //将DataFrame  保存到Hive  表中·
        //  接着将数据保存到good_student_info  中
        sqlContext.sql("DROP TABLE IF EXISTS good_student_info");
        System.out.println("create table success");
        goodStudentDS.write().saveAsTable("good_student_info");
        //  第四个功能 针对  good_student_info  表  直接创建   DataSet
        Dataset<Row> goodStudentDSRows=sqlContext.tables("good_student_info");
        Row[] goodStudentRows=goodStudentDSRows.collect();
        for (Row goodStudentRow:goodStudentRows){
            System.out.println(goodStudentRow);
        }
        System.out.println(goodStudentRows);
        sc.close();
    }
}

下面给出scala 示例：

object HiveDataSource {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("HiveDataSource")
    val sc = new SparkContext(conf)
    // 创建HiveContext  注意这里接收的是SparkContext   不是 JavaSparkContext
    val sqlContext = new HiveContext(sc)
    //第一个功能，使用HiveContext的Sql()/Hql
    sqlContext.sql("DROP TABLE IF EXISTS student_info")
    sqlContext.sql("CREATE  TABLE IF NOT EXISTS student_info (name STRING ,age INT)")
    System.out.println("============================create table success")
    //将学生的基本信息导入到StudentInfo  表
    sqlContext.sql("LOAD DATA LOCAL INPATH '/data/hive/student_info/student_info.txt' INTO TABLE  student_info")
    sqlContext.sql("DROP TABLE IF EXISTS student_scores")
    sqlContext.sql("CREATE  TABLE IF NOT EXISTS student_scores (name STRING ,score INT)")
    //将学生的基本分数导入到StudentInfo  表
    sqlContext.sql("LOAD DATA LOCAL INPATH '/data/hive/student_info/student_scores.txt' INTO TABLE  student_scores")
    //第二个功能接着将sql  返回的DataFrame  用于查询
    //执行sql  关联两张表查询大于80分的学生
    val goodStudentDS = sqlContext.sql("SELECT ss.name ,s1.age,ss.score from student_info s1 JOIN  student_scores ss ON s1.name=ss.name WHERE   ss.score>=80")
    //第三个功能，可以将 DataFrame  中的数据 理论上来说DataFrame  对应的RDD  数据  是ROW  即可
    //将DataFrame  保存到Hive  表中·
    //  接着将数据保存到good_student_info  中
    sqlContext.sql("DROP TABLE IF EXISTS good_student_info")
    System.out.println("create table success")
    goodStudentDS.write.saveAsTable("good_student_info")
    //  第四个功能 针对  good_student_info  表  直接创建   DataSet
    val goodStudentDSRows = sqlContext.tables("good_student_info")
    val goodStudentRows = goodStudentDSRows.collect
    for (goodStudentRow <- goodStudentRows) {
      System.out.println(goodStudentRow)
    }
  }
}