java操作hive数据库，建表

最新推荐文章于 2024-04-27 09:13:51 发布

早退的程序员

最新推荐文章于 2024-04-27 09:13:51 发布

阅读量424

点赞数 1

文章标签： java hive 数据库

本文链接：https://blog.csdn.net/qqnbsp/article/details/134174187

版权

<dependency>
	<groupId>org.apache.hadoop</groupId>
	<artifactId>hadoop-common</artifactId>
	<version>3.3.4</version>
</dependency>

<dependency>
	<groupId>org.apache.hadoop</groupId>
	<artifactId>hadoop-auth</artifactId>
	<version>3.3.4</version>
</dependency>

<dependency>
	<groupId>org.apache.hive</groupId>
	<artifactId>hive-jdbc</artifactId>
	<version>3.1.3</version> <!-- 请检查是否有更新的版本 -->
	<exclusions>
		<exclusion>
			<groupId>org.eclipse.jetty</groupId>
			<artifactId>jetty-runner</artifactId>
		</exclusion>
	</exclusions>
</dependency>

kerberos登陆的工具类，在hive hdfs等认证均可使用。

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.security.HadoopKerberosName;
import org.apache.hadoop.security.UserGroupInformation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.IOException;

public class KerberosUtil {

    private static Logger logger = LoggerFactory.getLogger(KerberosUtil.class);

    public static boolean loginKerberos(
            org.apache.hadoop.conf.Configuration config,
            String keytabPrincipal,
            File keytabFile,
            File krb5Conf,
            File coreSiteConfig,
            File hdfsSiteConfig) {

        config.addResource(new Path(coreSiteConfig.getPath()));
        config.addResource(new Path(hdfsSiteConfig.getPath()));

        // Kerberos
        System.setProperty("java.security.krb5.conf", krb5Conf.getPath());
        config.set("kerberos.principal", keytabPrincipal);
        config.set("keytab.file", keytabFile.getPath());

        UserGroupInformation.setConfiguration(config);
        try {
            UserGroupInformation.loginUserFromKeytab(keytabPrincipal, keytabFile.getPath());
            logger.info("UserGroupInformation LoginUser: {}", UserGroupInformation.getLoginUser().getUserName());
            return true;
        } catch (IOException e) {
            logger.error(e.getMessage(), e);
        }
        return false;
    }

    public static void reLoginKerberos() throws IOException {
        UserGroupInformation.getCurrentUser().checkTGTAndReloginFromKeytab();
    }

    public static void reset(){
        UserGroupInformation.setLoginUser(null);
        HadoopKerberosName.setRules(null);
    }

}

不认证登录hive

public String createHiveTable(String[] fields, String dbUrl, DbSchema schema, String tableName, String location, String delimiter){
		try {
			// 加载Hive JDBC驱动
			Class.forName("org.apache.hive.jdbc.HiveDriver");

			Connection connection = DriverManager.getConnection(dbUrl, "", "");
			// 创建与Hive的连接
			//Connection connection = DriverManager.getConnection(dbUrl);
			System.out.println("Connected to Hive");

			// 创建声明对象用于执行SQL语句
			Statement stmt = connection.createStatement();

			// 检查表是否存在
			String checkTableSql = "SHOW TABLES IN " + schema.getSchemaName() + " LIKE '" + tableName + "'";
			ResultSet resultSet = stmt.executeQuery(checkTableSql);
			boolean tableExists = resultSet.next();

			if (!tableExists) {
				String sql = "";
				for(int i = 0; i < fields.length;i++){
					if(!StringUtils.isEmpty(fields[i])){
						if(i != fields.length - 1){
							sql += "  `" + fields[i] + "` string, \n";
						}else {
							sql += "  `" + fields[i] + "` string";
						}
					}
				}
				String locate = "";
				if(!StringUtils.isEmpty(location)){
					locate = "LOCATION \n" +
					"'"+location+"'";
				}
				// 如果表不存在，则创建表
				String createTableSql = "CREATE TABLE `" + schema.getSchemaName() + "`.`" + tableName + "`("+
						  sql +
						")\n" +
						"PARTITIONED BY ( \n" +
						"  `day` string) \n" +
						"ROW FORMAT SERDE \n" +
						"  'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' \n" +
						"WITH SERDEPROPERTIES ( \n" +
						"  'field.delim'='" + delimiter + "', \n" +
						"  'serialization.format'='" + delimiter + "', \n" +
						"  'serialization.null.format'='')" +
						"STORED AS INPUTFORMAT \n" +
						"  'org.apache.hadoop.mapred.TextInputFormat' \n" +
						"OUTPUTFORMAT \n" +
						"  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\n" +
						locate;
				System.out.println(createTableSql);
				// 修改为你要创建的表结构
				stmt.executeUpdate(createTableSql);
				System.out.println("Table created successfully");

				// 创建分区
				// 获取当前日期
				LocalDate currentDate = LocalDate.now();
				// 创建一个DateTimeFormatter对象，用于将日期格式化为天的字符串
				DateTimeFormatter dayFormatter = DateTimeFormatter.ofPattern("yyyyMMdd");
				// 获取最近一周的日期范围
				LocalDate startDate = currentDate.minus(Period.ofDays(1));
				LocalDate endDate = currentDate.plus(Period.ofDays(7));

				// 循环遍历最近一周的日期，并输出天的字符串
				for (LocalDate date = startDate; date.isBefore(endDate.plusDays(1)); date = date.plusDays(1)) {
					String dayString = date.format(dayFormatter);
					String createPartitionSql = "ALTER TABLE `" + schema.getSchemaName() + "`.`" + tableName + "` ADD if not exists PARTITION(day='" + dayString + "')";
					stmt.executeUpdate(createPartitionSql);
				}

			} else {
				System.out.println(tableName + " already exists");
				return "tableExist";
			}

			// 关闭连接和声明对象
			resultSet.close();
			stmt.close();
			connection.close();
		} catch (ClassNotFoundException e) {
			e.printStackTrace();
			System.out.println("HiveDriver not found");
			return "failed";
		} catch (SQLException e) {
			e.printStackTrace();
			return "failed";
		}

		return "success";
	}

认证登录hive

public String createHiveTable(String[] fields, String dbUrl, DbSchema schema, String tableName, String location, String delimiter){
		File coreSite = new File("/dataload/collect/core-site.xml");
		File hdfsSite = new File("/dataload/collect/hdfs-site.xml");
		File keytab = new File("/dataload/collect/xxx.keytab");
		File krb5 = new File("/dataload/collect/krb5.conf");
		Configuration conf = new Configuration();
		boolean result = KerberosUtil.loginKerberos(conf, "xxx@DCOM.COM", keytab, krb5, coreSite, hdfsSite);
		System.out.println("kerberos auth result: " + result);

		try {
			// 加载Hive JDBC驱动
			Class.forName("org.apache.hive.jdbc.HiveDriver");

			Connection connection = DriverManager.getConnection(dbUrl, "", "");
			// 创建与Hive的连接
			//Connection connection = DriverManager.getConnection(dbUrl);
			System.out.println("Connected to Hive");

			// 创建声明对象用于执行SQL语句
			Statement stmt = connection.createStatement();

			// 检查表是否存在
			String checkTableSql = "SHOW TABLES IN " + schema.getSchemaName() + " LIKE '" + tableName + "'";
			ResultSet resultSet = stmt.executeQuery(checkTableSql);
			boolean tableExists = resultSet.next();

			if (!tableExists) {
				String sql = "";
				for(int i = 0; i < fields.length;i++){
					if(!StringUtils.isEmpty(fields[i])){
						if(i != fields.length - 1){
							sql += "  `" + fields[i] + "` string, \n";
						}else {
							sql += "  `" + fields[i] + "` string";
						}
					}
				}
				String locate = "";
				if(!StringUtils.isEmpty(location)){
					locate = "LOCATION \n" +
					"'"+location+"'";
				}
				// 如果表不存在，则创建表
				String createTableSql = "CREATE TABLE `" + schema.getSchemaName() + "`.`" + tableName + "`("+
						  sql +
						")\n" +
						"PARTITIONED BY ( \n" +
						"  `day` string) \n" +
						"ROW FORMAT SERDE \n" +
						"  'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' \n" +
						"WITH SERDEPROPERTIES ( \n" +
						"  'field.delim'='" + delimiter + "', \n" +
						"  'serialization.format'='" + delimiter + "', \n" +
						"  'serialization.null.format'='')" +
						"STORED AS INPUTFORMAT \n" +
						"  'org.apache.hadoop.mapred.TextInputFormat' \n" +
						"OUTPUTFORMAT \n" +
						"  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\n" +
						locate;
				System.out.println(createTableSql);
				// 修改为你要创建的表结构
				stmt.executeUpdate(createTableSql);
				System.out.println("Table created successfully");

				// 创建分区
				// 获取当前日期
				LocalDate currentDate = LocalDate.now();
				// 创建一个DateTimeFormatter对象，用于将日期格式化为天的字符串
				DateTimeFormatter dayFormatter = DateTimeFormatter.ofPattern("yyyyMMdd");
				// 获取最近一周的日期范围
				LocalDate startDate = currentDate.minus(Period.ofDays(1));
				LocalDate endDate = currentDate.plus(Period.ofDays(7));

				// 循环遍历最近一周的日期，并输出天的字符串
				for (LocalDate date = startDate; date.isBefore(endDate.plusDays(1)); date = date.plusDays(1)) {
					String dayString = date.format(dayFormatter);
					String createPartitionSql = "ALTER TABLE `" + schema.getSchemaName() + "`.`" + tableName + "` ADD if not exists PARTITION(day='" + dayString + "')";
					stmt.executeUpdate(createPartitionSql);
				}

			} else {
				System.out.println(tableName + " already exists");
				return "tableExist";
			}

			// 关闭连接和声明对象
			resultSet.close();
			stmt.close();
			connection.close();
		} catch (ClassNotFoundException e) {
			e.printStackTrace();
			System.out.println("HiveDriver not found");
			return "failed";
		} catch (SQLException e) {
			e.printStackTrace();
			return "failed";
		}

		return "success";
	}