【无标题】SparkUtil

package com.citi.gft.enrichment.util;

import org.apache.commons.lang.SystemUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.springframework.core.io.ClassPathResource;

import java.io.IOException;
import java.util.Arrays;
import java.util.List;

import static org.apache.spark.sql.functions.lit;


public class SparkUtil {

    public static SparkSession buildSession() throws IOException {
        SparkConf conf = new SparkConf()
                .set("spark.port.maxRetries","100")
                .setAppName("**")
                .setMaster("local[*]");

        if (SystemUtils.IS_OS_WINDOWS) {
            String cp = new ClassPathResource("Kerberos/").getURL().getPath();
            Configuration hf = new Configuration();
            System.setProperty("javax.net.ssl.trustStorePassword", "**");
            System.setProperty("javax.security.auth.useSubjectCredsOnly", "false");

            System.setProperty("java.security.krb5.conf", cp + "krb5.conf");
            System.setProperty("javax.net.ssl.trustStore", cp + "truststore");

            hf.set("hadoop.security.authentication", "Kerberos");
            UserGroupInformation.setConfiguration(hf);
            UserGroupInformation.loginUserFromKeytab(USER, PATH);
        }
        return SparkSession.builder().config(conf).enableHiveSupport().getOrCreate();
    }

    public static void writeToHive(SparkSession sparkSession, Dataset<Row> source, String hiveTable){
        String[] sourceColumns = source.columns();
        List<String> sourceColumnsList = Arrays.asList(sourceColumns);
        Dataset<Row> dest = sparkSession.sql("select * from "+hiveTable+" where 1 != 1");
        String[] destColumns = dest.columns();
        for(int i=0; i< destColumns.length; i++){
            if(!sourceColumnsList.contains(destColumns[i])){
                source = source.withColumn(destColumns[i], lit(null));
            }
        }
        source.show();
        Dataset<Row> finalDataset = dest.unionByName(source);
        finalDataset.show();
        sparkSession.sql("set hive.exec.dynamic.partition=true");
        sparkSession.sql("set hive.exec.dynamic.partition.mode=nonstrict");
        sparkSession.sql("SET hive.support.quoted.identifiers=NONE");
        finalDataset.write().mode("overwrite").format("Hive").insertInto(hiveTable);
    }
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值