Java通过JDBC连接Hive并进行操作

最新推荐文章于 2023-10-10 00:30:54 发布

17245

最新推荐文章于 2023-10-10 00:30:54 发布

阅读量1w

点赞数 6

分类专栏： hive

本文链接：https://blog.csdn.net/houkai18792669930/article/details/106154699

版权

hive 专栏收录该内容

7 篇文章

订阅专栏

1.工程需要的maven依赖

注意：hive的依赖包的版本需要hive中间件中的lib下对应的hive包的版本一致，否则可能会有版本兼容问题。

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <groupId>com.hl</groupId>
    <artifactId>hive</artifactId>
    <version>0.1</version>
    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <maven.compiler.source>1.8</maven.compiler.source>
        <maven.compiler.target>1.8</maven.compiler.target>
    </properties>
    <dependencies>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.12</version>
            <scope>compile</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>3.1.0</version>
            <exclusions>
                <exclusion>
                    <groupId>log4j</groupId>
                    <artifactId>log4j</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>3.1.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hive</groupId>
            <artifactId>hive-metastore</artifactId>
            <version>2.3.7</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hive</groupId>
            <artifactId>hive-exec</artifactId>
            <version>2.3.7</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hive</groupId>
            <artifactId>hive-jdbc</artifactId>
            <version>2.3.7</version>
        </dependency>
    </dependencies>
    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <configuration>
                    <source>${maven.compiler.source}</source>
                </configuration>
            </plugin>
        </plugins>
    </build>
</project>

2.Java代码以及说明

package hive;
import org.junit.After;
import org.junit.Test;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
/**
 * @describe java 通过JDBC连接hive
 * <p>
 * 错误：Permission denied: user=anonymous, access=WRITE, inode="/":root:supergroup:drwxrwxr-x
 * 解决：
 * 1.关闭权限校验：hadoop dfsadmin -safemode leave
 * 2.根据路权限扩大： hadoop fs -chmod 777 /
 * 错误：Column length too big for column 'PARAM_VALUE' (max = 21845); use BLOB or TEXT instead
 * 解决：连接mysql 修改hive库的编码
 * ALTER DATABASE `hive`
 * DEFAULT CHARACTER SET latin1
 * DEFAULT COLLATE latin1_bin;
 */
public class HiveTest {
    private static final String driverName = "org.apache.hive.jdbc.HiveDriver";
    private static final String url = "jdbc:hive2://IP:10000";
    private static final String dbName = "db_test";
    private static Connection con = null;
    private static Statement state = null;
    private static ResultSet res = null;

    /**
     * 释放资源
     */
    @After
    public void destory() throws SQLException {
        if (res != null) state.close();
        if (state != null) state.close();
        if (con != null) con.close();
    }

    /**
     * 查询所有数据库
     */
    @Test
    public void showtDb() throws SQLException, ClassNotFoundException {
        Class.forName(driverName);
        con = DriverManager.getConnection(url);
        state = con.createStatement();
        res = state.executeQuery("show databases");
        while (res.next()) {
            System.out.println(res.getString(1));
        }
    }

    /**
     * 创建数据库
     */
    @Test
    public void createDb() throws SQLException, ClassNotFoundException {
        Class.forName(driverName);
        Connection con = DriverManager.getConnection(url);
        state = con.createStatement();
        state.execute("create database db_test");
    }
    public static void init() {
        try {
            Class.forName(driverName);
            con = DriverManager.getConnection(url + "/" + dbName);
            state = con.createStatement();
        } catch (ClassNotFoundException | SQLException e) {
            e.printStackTrace();
        }

    }
    /**
     * 删除数据库
     * DROP DATABASE IF EXISTS DbName CASCADE;
     */
    @Test
    public void dropDb() throws SQLException {
        init();
        state.execute("drop database if exists " + dbName + " CASCADE");
    }

    /*
     * 内部表基本操作 创建表
     * */
    @Test
    public void createTab() throws SQLException {
        init();
        state.execute("create table if not exists student ( " +
                "name string , " +
                "age int , " +
                "agent string  ," +
                "adress struct<street:STRING,city:STRING>) " +
                "row format delimited " +
                "fields terminated by ',' " +//字段与字段之间的分隔符
                "collection items terminated by ':'" +//一个字段各个item的分隔符
                "lines terminated by '\n' ");//行分隔符
    }
    /**
     * 查询所有表
     */
    @Test
    public void showTab() throws SQLException {
        init();
        res = state.executeQuery("show tables");
        while (res.next()) {
            System.out.println(res.getString(1));
        }
    }
    /**
     * 查看表结构
     */
    @Test
    public void descTab() throws SQLException {
        res = state.executeQuery("desc student");
        while (res.next()) {
            System.out.println(res.getString(1) + "\t" + res.getString(2));
        }
    }
    /**
     * 加载数据
     */
    @Test
    public void loadData() throws SQLException {
        init();
        state.execute("load data local inpath '/root/studentData.txt' overwrite into table student");
    }
    /**
     * 查询数据
     */
    @Test
    public void selectTab() throws SQLException {
        init();
        res = state.executeQuery("select * from student");
        while (res.next()) {
            System.out.println(res.getString(1) + "-" + res.getString(2) + "-" + res.getString(3) + "-" + res.getString(4));
        }
    }
    /**
     * 统计查询（会运行mapreduce作业，资源开销较大）
     */
    @Test
    public void countData() throws SQLException {
        init();
        res = state.executeQuery("select count(1) from student");
        while (res.next()) {
            System.out.println(res.getInt(1));
        }
    }
    /**
     * 删除表
     */
    @Test
    public void dropTab() throws SQLException {
        init();
        state.execute("drop table student");
    }

    /**
     * 创建外部表外部表基本操作
     * 外部表删除后，hdfs文件系统上的数据还在，重新创建同路径外部表后，其数据仍然存在
     * 不指定路径时默认使用hive.metastore.warehouse.dir指定的路径
     */
    @Test
    public void createExTab() throws SQLException {
        init();
        state.execute("create external table if not exists student_ext ( " +
                "name string , " +
                "age int , " +
                "agent string  ," +
                "adress struct<street:STRING,city:STRING>) " +
                "row format delimited " +
                "fields terminated by ',' " +
                "collection items terminated by ':'" +
                "lines terminated by '\n' " +
                "stored as textfile " +
                "location '/testData/hive/student1' ");
    }
    /**
     * 从一张已经存在的表上复制其表结构，并不会复制其数据
     * 创建表，携带数据 create table student1 as select * from student
     * 创建表，携带表结构 create table student1 like student
     */
    @Test
    public void copyExTab() throws SQLException {
        init();
        state.execute("create external table if not exists student2 like " +
                "student location '/testData/hive/student1'");
    }
    /*
     *创建分区格式表, 必须在表定义时创建partition
     * */
    @Test
    public void creatPartab() throws SQLException {
        init();
        state.execute("create table if not exists emp (" +
                "name string ," +
                "salary int ," +
                "subordinate array<string> ," +
                "deductions map<string,float> ," +
                "address struct<street:string,city:string>) " +
                "partitioned by (city string,street string) " +
                "row format delimited " +
                "fields terminated by '\t' " +
                "collection items terminated by ',' " +
                "map keys terminated by ':' " +
                "lines terminated by '\n' " +
                "stored as textfile");
    }
    /**
     * 添加分区表
     */
    @Test
    public void addPartition() throws SQLException {
        init();
        state.execute("alter table emp add partition(city='shanghai',street='jinkelu') ");
    }
    /**
     * 查看分区表信息 city=shanghai/street=jinkelu
     */
    @Test
    public void showPartition() throws SQLException {
        init();
        res = state.executeQuery("show partitions emp");
        while (res.next()) {
            System.out.println(res.getString(1));
        }
    }
    /**
     * 插入数据
     */
    @Test
    public void loadParData() throws SQLException {
        init();
        String filepath = " '/root/emp.txt' ";
        state.execute("load data local inpath " + filepath + " overwrite into table emp partition (city='shanghai',street='jinkelu')");
    }
    /**
     * 删除分区表
     */
    @Test
    public void dropPartition() throws SQLException {
        init();
        state.execute("alter table employees drop partition (city='shanghai',street='jinkelu') ");
        /*
        *
        * 1，把一个分区打包成一个har包
             alter table emp archive partition (city='shanghai',street='jinkelu')
          2, 把一个分区har包还原成原来的分区
    `        alter table emp unarchive partition (city='shanghai',street='jinkelu')
          3, 保护分区防止被删除
             alter table emp partition (city='shanghai',street='jinkelu') enable no_drop
          4,保护分区防止被查询
             alter table emp partition (city='shanghai',street='jinkelu') enable offline
          5，允许分区删除和查询
             alter table emp partition (city='shanghai',street='jinkelu') disable no_drop
             alter table emp partition (city='shanghai',street='jinkelu') disable offline
        * */
    }
    /*
    动态分区
    当需要一次插入多个分区的数据时，可以使用动态分区，根据查询得到的数据动态分配到分区里。
     动态分区与静态分区的区别就是不指定分区目录，由hive根据实际的数据选择插入到哪一个分区。
    set hive.exec.dynamic.partition=true; 启动动态分区功能
    set hive.exec.dynamic.partition.mode=nonstrict   分区模式，默认nostrict
    set hive.exec.max.dynamic.partitions=1000       最大动态分区数,默认1000
    */
    /**
     * 创建分区格式表
     */
    @Test
    public void creatPartab1() throws SQLException {
        init();
        state.execute("create table if not exists emp1 (" +
                "name string ," +
                "salary int ," +
                "subordinate array<string> ," +
                "deductions map<string,float> ," +
                "address struct<street:string,city:string>) " +
                "partitioned by (city string,street string) " +
                "row format delimited " +
                "fields terminated by '\t' " +
                "collection items terminated by ',' " +
                "map keys terminated by ':' " +
                "lines terminated by '\n' " +
                "stored as textfile");
    }
    /**
     * 靠查询到的数据来分区
     */
    @Test
    public void loadPartitionData() throws SQLException {
        init();
        state.execute("insert overwrite table emp1 partition (city='shanghai',street) " +
                "select name,salary,subordinate,deductions,address,address.street from emp");
    }
}