package hx.com
import hx.com.UDF.{JudgeLicense, SqlDataSource, UDF, UuidUdf}
import hx.com.constant.PropConstants
import hx.com.util.PropertieUtil
import org.apache.hadoop.security.UserGroupInformation
import org.apache.log4j.Logger
import org.apache.spark.sql.SparkSession
import java.io.File
import java.util.Properties
import scala.io.{BufferedSource, Source}
/**
* 此版本是最新版,并已经部署在集群上(/home/etl_admin/spark)
* ods层数据清洗落地到dwd层
* 有特定UDF自定义函数的代码:也可以跑通用sql
*/
object Ods2DwdFilterUDF {
def main(args: Array[String]): Unit = {
val timeBegin: Long = System.currentTimeMillis()
val log: Logger = Logger.getRootLogger
val filePath: String = args(0)
//读取集群配置文件
val prop: Properties = PropertieUtil.load("config.properties")
//本地测试读文件
// val prop: Properties = PropertieUtil.getProperties("/config.properties")
//读hive 的Kerberos认证
System.setProperty("java.security.krb5.conf", prop.getProperty(PropConstants.KRB5_CONF_PATH))
System.setProperty("HADOOP_USER_NAME", prop.getProperty(PropConstants.HADOOP_USER_NAME))
System.setProperty("user.name", prop.getProperty(PropConstants.USER_NAME))
UserGroupInformation.loginUserFromKeytab(
prop.getProperty(PropConstants.KEYTAB_NAME), prop.getProperty(PropConstants.KEYTAB_FILE_PATH)
)
System.out.println(UserGroupInformation.getLoginUser)
val session: SparkSession = SparkSession.builder()//.master("local[2]").appName("SparkSeesionApp")
.config("spark.hadoop.hive.exec.dynamic.partition", "true")//开启动态分区
.config("spark.hadoop.hive.exec.dynamic.partition.mode", "nonstrict")//开启动态分区
.enableHiveSupport() //支持hive
.getOrCreate()
// session.sparkContext.setLogLevel("WARN")
import org.apache.spark.sql.functions.udf
//必须指定泛型[[Integer,String]],integer为自定义udf函数的出参类型,String为入参类型
//参一"judgeLicense"是udf在sql使用中的方法明,参二是udf类中的具体方法
session.udf.register[Integer,String]("judgeLicense",JudgeLicense.judgeLicense)
//增加 equals,rangeMap UDF函数