黑猴子的家:Spark Sql 写入数据到 Hive

1、Constants 常量

object Constants {

  val TABLE_USER_INFO = "user_info"
  val TABLE_PRODUCT_INFO = "product_info"
  val TABLE_USER_VISIT_ACTION = "user_visit_action"

}

2、DateModel

/**
  * 数量:100
  *
  * @param user_id      用户的ID[1 - 100]
  * @param username     用户的名称[user + id]
  * @param name         用户的姓名[name + id]
  * @param age          用户的年龄[1 - 60]
  * @param professional 用户的职业[profess + [1 - 100]
  * @param city         用户所在的城市[1 - 10]
  * @param sex          用户的性别[male,female]
  */
case class UserInfo(user_id: Int,
                    username: String,
                    name: String,
                    age: String,
                    professional: String,
                    city: String,
                    sex: String)

3、MockDataWareHouse

import java.util.UUID

import org.apache.commons.lang3.time.DateFormatUtils
import org.apache.spark.SparkConf
import org.apache.spark.sql.{DataFrame, SparkSession}
import org.slf4j.{Logger, LoggerFactory}

import scala.collection.mutable.ArrayBuffer
import scala.util.Random

object MockDataWareHouse {

  //用于生成用户数据
  /**
    * 数量:100
    *
    *  user_id      用户的ID[1 - 100]
    *  username     用户的名称[user + id]
    *  name         用户的姓名[name + id]
    *  age          用户的年龄[1 - 60]
    *  professional 用户的职业[profess + [1 - 100]
    *  city         用户所在的城市[1 - 10]
    *  sex          用户的性别[male,female]
    */
  def userInfoGenerate(): Array[UserInfo] = {
    val array = ArrayBuffer[UserInfo]()
    val sexes = Array("male", "female")
    //生成随机数的工具类
    val random = new Random()

    for (i <- 0 to 100) {
      val user_id = i
      val username = "user" + user_id
      val name = "name" + user_id
      val age = random.nextInt(60)
      val professional = "profess" + random.nextInt(100)
      val city = random.nextInt(10)
      val sex = sexes(random.nextInt(2))

      array += UserInfo(user_id, username, name, age.toString, professional, city.toString, sex)
    }

    array.toArray
  }


  //插入到HIVE
  def saveInDataWarehouse(spark: SparkSession, table: String, data: DataFrame): Unit = {
    spark.sql("DROP TABLE IF EXISTS " + table)
    data.write.saveAsTable(table)
  }


 def main(args: Array[String]): Unit = {

    val logger = LoggerFactory.getLogger(this.getClass)

    //创建sparkConf
    val sparkConf = new SparkConf().setAppName("mock").setMaster("local[*]")

    //创建SparkSession
    val spark = SparkSession.builder().config(sparkConf).enableHiveSupport().getOrCreate()

    //创建Mock数据
    val userInfoData = this.userInfoGenerate()

    //将Mock数据转换成RDD,DF
    import spark.implicits._
    val userInfoDF = spark.sparkContext.makeRDD(userInfoData).toDF

    //将数据save到Hive
    saveInDataWarehouse(spark,Constants.TABLE_USER_INFO, userInfoDF)

    //关闭Spark
    spark.stop()
  }
}

4、pom.xml

<dependencies>

    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-core_2.11</artifactId>
    </dependency>

    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-hive_2.11</artifactId>
    </dependency>

    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-sql_2.11</artifactId>
    </dependency>

</dependencies>

<build>
    <plugins>
        <plugin>
            <groupId>net.alchim31.maven</groupId>
            <artifactId>scala-maven-plugin</artifactId>
        </plugin>
    </plugins>
</build>
  • 1
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值