学习笔记大数据技术与应用省赛
spark on yarn
package pra.test
import org.apache.spark.sql.SparkSession
/**
* @description: TODO
* @author 逍遥哥哥每天都要努力啊
* @date 2022/9/26 9:27
* @version 1.0
*/
object mysqlRead {
def main(args: Array[String]): Unit = {
//hive --service metastore &
//本地hive运行表明用户
// System.setProperty("HADOOP_USER_NAME", "root")
val spark = SparkSession
.builder()
.master("yarn")
// .master("local[*]")
.appName("readMysql")
// .config("spark.sql.warehouse.dir", "hdfs://hadoop01:50070/user/hive/warehouse")
// .config("hive.metastore.uris", "thrift://hadoop01:9083")
.enableHiveSupport()
.getOrCreate()
val mysqlData = spark.read
.format("jdbc")
//数据库驱动程序类名 8.0
.option("driver", "com.mysql.jdbc.Driver")
//连接的url 加数据库名时区 ?useSSL=false&serverTimezone=Asia/Shanghai&allowPublicKeyRetrieval=true
.option("url", "jdbc:mysql://192.168.202.134:3306/ContractionDB")
//连接账户
.option("user", "root")
//密码
.option("password", "123456")
//读取库中的表
.option("dbtable", "T_appr_area")
.load()
mysqlData.show(false)
mysqlData.createOrReplaceTempView("mysql")
// create table ods_area_part(
// proj_id int,
// proj_name string,
// appro_date string,
// appro_unit string
// )
// partitioned by(appr_date string)
// row format delimited fields terminated by ",";
//全量抽取***增量抽取 静态分区 insert into
spark.sql(
"""
|insert overwrite table ods.ods_area_part
|partition(appr_date="2022-09-26")
|select proj_id,proj_name,appro_date,appro_unit
|from mysql
|where appr_date="2022-09-26"
""".stripMargin)
spark.close()
}
}
package pra.test
import org.apache.spark.sql.SparkSession
/**
* @description: TODO
* @author 逍遥哥哥每天都要努力啊
* @date 2022/9/28 19:08
* @version 1.0
*/
object mysqlRead2 {
def main(args: Array[String]): Unit = {
val spark = SparkSession
.builder()
.master("yarn")
.appName("readMysql")
.enableHiveSupport()
.getOrCreate()
val mysqlData = spark.read
.format("jdbc")
.option("driver", "com.mysql.jdbc.Driver")
.option("url", "jdbc:mysql://192.168.202.134:3306/ContractionDB")
.option("user", "root")
.option("password", "123456")
.option("dbtable", "T_plot_info")
.load()
mysqlData.createOrReplaceTempView("mysql")
// create table if not exists ods_plot_info(
// plot_id string,
// proj_id int ,
// total_area float ,
// plot_name string,
// purpose string,
// shape_type string
// )
// partitioned by(rdate string)
// row format delimited fields terminated by ",";
//动态分区
//开启动态分区,非严格模式
spark.sql(
"""
|set hive.exec.dynamic.partition=true
|""".stripMargin)
spark.sql(
"""
|set hive.exec.dynamic.partition.mode=nonstrict
|""".stripMargin)
//动态分区 全量抽取****增量抽取
spark.sql(
"""
|insert into table ods.ods_plot_info
|partition(rdate)
|select plot_id,proj_id,total_area,plot_name,purpose,shape_type,rdate
|from mysql
""".stripMargin)
spark.close()
}
}