package com.citi.gft.enrichment.util;
import org.apache.commons.lang.SystemUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.springframework.core.io.ClassPathResource;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import static org.apache.spark.sql.functions.lit;
public class SparkUtil {
public static SparkSession buildSession() throws IOException {
SparkConf conf = new SparkConf()
.set("spark.port.maxRetries","100")
.setAppName("**")
.setMaster("local[*]");
if (SystemUtils.IS_OS_WINDOWS) {
String cp = new ClassPathResource("Kerberos/").getURL().getPath();
Configuration hf = new Configuration();
System.setProperty("javax.net.ssl.trustStorePassword", "**");
System.setProperty("javax.security.auth.useSubjectCredsOnly", "false");
System.setProperty("java.security.krb5.conf", cp + "krb5.conf");
System.setProperty("javax.net.ssl.trustStore", cp + "truststore");
hf.set("hadoop.security.authentication", "Kerberos");
UserGroupInformation.setConfiguration(hf);
UserGroupInformation.loginUserFromKeytab(USER, PATH);
}
return SparkSession.builder().config(conf).enableHiveSupport().getOrCreate();
}
public static void writeToHive(SparkSession sparkSession, Dataset<Row> source, String hiveTable){
String[] sourceColumns = source.columns();
List<String> sourceColumnsList = Arrays.asList(sourceColumns);
Dataset<Row> dest = sparkSession.sql("select * from "+hiveTable+" where 1 != 1");
String[] destColumns = dest.columns();
for(int i=0; i< destColumns.length; i++){
if(!sourceColumnsList.contains(destColumns[i])){
source = source.withColumn(destColumns[i], lit(null));
}
}
source.show();
Dataset<Row> finalDataset = dest.unionByName(source);
finalDataset.show();
sparkSession.sql("set hive.exec.dynamic.partition=true");
sparkSession.sql("set hive.exec.dynamic.partition.mode=nonstrict");
sparkSession.sql("SET hive.support.quoted.identifiers=NONE");
finalDataset.write().mode("overwrite").format("Hive").insertInto(hiveTable);
}
}
【无标题】SparkUtil
最新推荐文章于 2024-07-13 16:15:27 发布