Spark 写数据到 hive 分区表

9 篇文章 0 订阅

思路

RDD => Dataset => create partition table => create temp table ( Dataset ) =>insert

代码奉上

import org.apache.spark.SparkConf
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{Dataset, SaveMode, SparkSession}


object Test01 {
  def main(args: Array[String]): Unit = {
  
    val conf = new SparkConf().setMaster("local[*]").setAppName("SparkSqlOnHive")
    val sparkSession = SparkSession
      .builder()
      .config(conf)
      .config("spark.sql.warehouse.dir", "hdfs://192.168.200.105:9000/user/hive/warehouse")
      .enableHiveSupport()
      .getOrCreate()
    import sparkSession.implicits._

    val dataRDD: RDD[(Int, String)] = sparkSession.sparkContext.makeRDD(List((1, "张三"), (2, "李四")))
    val studentDS: Dataset[student] = dataRDD.map {
      case (id, name) => student(id, name)
    }.toDS()
    studentDS.createOrReplaceTempView("student2021")

	sparkSession.sql("create database SkillsCompetitions")
    
    sparkSession.sql("use SkillsCompetitions")

    sparkSession.sql(
      """
        |create table student (id int, name string)
        |partitioned by (year string)
        |row format delimited fields terminated by "\t"
        |""".stripMargin)

    sparkSession.sql(
      """
        |insert into table student partition (year="2021")
        |select id, name from student2021
        |""".stripMargin)

    sparkSession.close()
  }
  case class student (id: Int, name: String)

}

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值