大数据入门教程系列之Hive的Java API 操作

最新推荐文章于 2023-05-08 09:07:45 发布

zjh_746140129

最新推荐文章于 2023-05-08 09:07:45 发布

阅读量968

点赞数 1

分类专栏： hive 文章标签： java api操作hive

本文链接：https://blog.csdn.net/zjh_746140129/article/details/82812962

版权

hive 专栏收录该内容

11 篇文章

订阅专栏

Java 想要访问Hive，需要通过beeline的方式连接Hive，hiveserver2提供了一个新的命令行工具beeline，hiveserver2 对之前的hive做了升级，功能更加强大，它增加了权限控制，要使用beeline需要先启动hiverserver2，再使用beeline连接

操作步骤：

①、修改hadoop的core-site.xml配置文件

②、启动hadoop

③、启动hiverserver2

④、打开一个新窗口使用beeline连接(注意这里的javaapidb需要提前建立好)

⑤、新建java项目(maven)

详细步骤：

一、修改hadoop的core-site.xml配置文件

<property>
    <name>hadoop.proxyuser.hadoop.hosts</name>
    <value>*</value>
</property>
<property>
    <name>hadoop.proxyuser.hadoop.groups</name>
    <value>*</value>
</property>

否则使用beeline连接时会报如下错

hadoop is not allowed to impersonate hadoop (state=08S01,code=0)

原因：hiveserver2增加了权限控制，需要在hadoop的配置文件中配置

解决方法：在hadoop的core-site.xml中添加如下内容，然后重启hadoop，再使用beeline连接即可

参考官网：

https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/Superusers.html

二、启动hadoop

start-sll.sh

三、启动hiverserver2

hiveserver2

jps看到这个进程说明启动成功了

四、打开一个新窗口使用beeline连接(注意这里的javaapidb需要提前建立好)

beeline -u jdbc:hive2://node1:10000/javaapidb -n hadoop -p

参数解释：

-u：连接url，可以使用IP，也可以使用主机名，端口默认为10000

-n：连接的用户名（注：不是登录hive的用户名，是hive所在服务器登录用户名）

-p：密码，可以不用输入

启动中密码处可以直接回车，启动后我们查询一下数据库即可看到是否启动成功

五、新建java项目(maven)

1、修改pom


<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.demo</groupId>
    <artifactId>java-api-hive</artifactId>
    <version>1.0-SNAPSHOT</version>

    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    </properties>

    <dependencies>
        <dependency>
            <groupId>org.apache.hive</groupId>
            <artifactId>hive-jdbc</artifactId>
            <version>2.3.0</version>
        </dependency>

        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.9</version>
        </dependency>
    </dependencies>

    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.5.1</version>
                <configuration>
                    <source>1.8</source>
                    <target>1.8</target>
                </configuration>
            </plugin>
        </plugins>
    </build>

</project>

2、创建测试类HiveJDBC，代码如下

官网参考：https://cwiki.apache.org/confluence/display/Hive/HiveServer2+Clients

演示了一个查询

完整代码：

package hiveapi;

/**
 * Created by zhoujh on 2018/8/15.
 */


import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.sql.*;

/**
 * JDBC 操作 Hive（注：JDBC 访问 Hive 前需要先启动HiveServer2）
 */
public class HiveJDBC {

    private static String driverName = "org.apache.hive.jdbc.HiveDriver";
    private static String url = "jdbc:hive2://node1:10000/javaapidb";
    private static String user = "hadoop";
    private static String password = "";

    private static Connection conn = null;
    private static Statement stmt = null;
    private static ResultSet rs = null;

    // 加载驱动、创建连接
    @Before
    public void init() throws Exception {
        Class.forName(driverName);
        conn = DriverManager.getConnection(url, user, password);
        stmt = conn.createStatement();
    }

    // 创建数据库
    @Test
    public void createDatabase() throws Exception {
        String sql = "create testdb";
        System.out.println("Running: " + sql);
        stmt.execute(sql);
    }

    // 查询所有数据库
    @Test
    public void showDatabases() throws Exception {
        String sql = "show databases";
        System.out.println("Running: " + sql);
        rs = stmt.executeQuery(sql);
        while (rs.next()) {
            System.out.println(rs.getString(1));
        }
    }

    // 创建表
    @Test
    public void createTable() throws Exception {
        String sql = "create table emp(\n" +
                "empno int,\n" +
                "ename string,\n" +
                "job string,\n" +
                "mgr int,\n" +
                "hiredate string,\n" +
                "sal double,\n" +
                "comm double,\n" +
                "deptno int\n" +
                ")\n" +
                "row format delimited fields terminated by '\\t'";
        System.out.println("Running: " + sql);
        stmt.execute(sql);
    }

    // 查询所有表
    @Test
    public void showTables() throws Exception {
        String sql = "show tables";
        System.out.println("Running: " + sql);
        rs = stmt.executeQuery(sql);
        while (rs.next()) {
            System.out.println(rs.getString(1));
        }
    }

    // 查看表结构
    @Test
    public void descTable() throws Exception {
        String sql = "desc emp";
        System.out.println("Running: " + sql);
        rs = stmt.executeQuery(sql);
        while (rs.next()) {
            System.out.println(rs.getString(1) + "\t" + rs.getString(2));
        }
    }

    // 加载数据
    @Test
    public void loadData() throws Exception {
        String filePath = "/home/hadoop/data/emp.txt";
        String sql = "load data local inpath '" + filePath + "' overwrite into table emp";
        System.out.println("Running: " + sql);
        stmt.execute(sql);
    }

    // 查询数据
    @Test
    public void selectData() throws Exception {
        String sql = "select * from emp";
        System.out.println("Running: " + sql);
        rs = stmt.executeQuery(sql);
        System.out.println("员工编号" + "\t" + "员工姓名" + "\t" + "工作岗位");
        while (rs.next()) {
            System.out.println(rs.getString("empno") + "\t\t" + rs.getString("ename") + "\t\t" + rs.getString("job"));
        }
    }

    // 统计查询（会运行mapreduce作业）
    @Test
    public void countData() throws Exception {
        String sql = "select count(1) from emp";
        System.out.println("Running: " + sql);
        rs = stmt.executeQuery(sql);
        while (rs.next()) {
            System.out.println(rs.getInt(1));
        }
    }

    // 删除数据库
    @Test
    public void dropDatabase() throws Exception {
        String sql = "drop database if exists hive_jdbc_test";
        System.out.println("Running: " + sql);
        stmt.execute(sql);
    }

    // 删除数据库表
    @Test
    public void deopTable() throws Exception {
        String sql = "drop table if exists emp";
        System.out.println("Running: " + sql);
        stmt.execute(sql);
    }

    // 释放资源
    @After
    public void destory() throws Exception {
        if (rs != null) {
            rs.close();
        }
        if (stmt != null) {
            stmt.close();
        }
        if (conn != null) {
            conn.close();
        }
    }
}