1、Constants 常量
object Constants {
val TABLE_USER_INFO = "user_info"
val TABLE_PRODUCT_INFO = "product_info"
val TABLE_USER_VISIT_ACTION = "user_visit_action"
}
2、DateModel
/**
* 数量:100
*
* @param user_id 用户的ID[1 - 100]
* @param username 用户的名称[user + id]
* @param name 用户的姓名[name + id]
* @param age 用户的年龄[1 - 60]
* @param professional 用户的职业[profess + [1 - 100]
* @param city 用户所在的城市[1 - 10]
* @param sex 用户的性别[male,female]
*/
case class UserInfo(user_id: Int,
username: String,
name: String,
age: String,
professional: String,
city: String,
sex: String)
3、MockDataWareHouse
import java.util.UUID
import org.apache.commons.lang3.time.DateFormatUtils
import org.apache.spark.SparkConf
import org.apache.spark.sql.{DataFrame, SparkSession}
import org.slf4j.{Logger, LoggerFactory}
import scala.collection.mutable.ArrayBuffer
import scala.util.Random
object MockDataWareHouse {
//用于生成用户数据
/**
* 数量:100
*
* user_id 用户的ID[1 - 100]
* username 用户的名称[user + id]
* name 用户的姓名[name + id]
* age 用户的年龄[1 - 60]
* professional 用户的职业[profess + [1 - 100]
* city 用户所在的城市[1 - 10]
* sex 用户的性别[male,female]
*/
def userInfoGenerate(): Array[UserInfo] = {
val array = ArrayBuffer[UserInfo]()
val sexes = Array("male", "female")
//生成随机数的工具类
val random = new Random()
for (i <- 0 to 100) {
val user_id = i
val username = "user" + user_id
val name = "name" + user_id
val age = random.nextInt(60)
val professional = "profess" + random.nextInt(100)
val city = random.nextInt(10)
val sex = sexes(random.nextInt(2))
array += UserInfo(user_id, username, name, age.toString, professional, city.toString, sex)
}
array.toArray
}
//插入到HIVE
def saveInDataWarehouse(spark: SparkSession, table: String, data: DataFrame): Unit = {
spark.sql("DROP TABLE IF EXISTS " + table)
data.write.saveAsTable(table)
}
def main(args: Array[String]): Unit = {
val logger = LoggerFactory.getLogger(this.getClass)
//创建sparkConf
val sparkConf = new SparkConf().setAppName("mock").setMaster("local[*]")
//创建SparkSession
val spark = SparkSession.builder().config(sparkConf).enableHiveSupport().getOrCreate()
//创建Mock数据
val userInfoData = this.userInfoGenerate()
//将Mock数据转换成RDD,DF
import spark.implicits._
val userInfoDF = spark.sparkContext.makeRDD(userInfoData).toDF
//将数据save到Hive
saveInDataWarehouse(spark,Constants.TABLE_USER_INFO, userInfoDF)
//关闭Spark
spark.stop()
}
}
4、pom.xml
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_2.11</artifactId>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
</plugin>
</plugins>
</build>