对Hive的一些基本操作做了个简单的封装,未创建Maven工程,至于需要的包,以及如何获取HiveConnection请参考上一篇博文: http://my.oschina.net/gently/blog/683604
一:首先,配置好连接Hive的Properties,创建hivedb.properties 如下:
driver=org.apache.hive.jdbc.HiveDriver
url=jdbc:hive2://master:10000/default
user=hive
password=xujun
二:创建HiveProperties,继承自Properties,读取hivedb.properties(单例)
public class HiveProperties extends Properties {
/**
* @author Berg
* 读取hivedb.properties
*/
private static final long serialVersionUID = 1L;
private static HiveProperties hiveProperties;
private HiveProperties(){
InputStream iis = HiveProperties.class.getClassLoader().getResourceAsStream("hivedb.properties");
try{
super.load( iis );
}catch(IOException e ){
e.printStackTrace();
}
}
public static HiveProperties getInstance(){
if( hiveProperties == null){
hiveProperties = new HiveProperties();
}
return hiveProperties;
}
}
三:创建HiveUtil,对Hive中的基本操作进行简单封装:
注意:导入数据的时候,可以从本地文件导入数据,也可以从HDFS上导入数据,
区别是:如果导入的是HDFS数据,则不需要local关键字
public class HiveUtil {
//读取一次驱动 静态块
static{
try {
Class.forName(HiveProperties.getInstance().getProperty("driver"));
}catch(ClassNotFoundException e){
e.printStackTrace(); //输出至标准错误流。
}
}
//获取HiveConnection
public Connection getConn() throws SQLException{
HiveProperties hp = HiveProperties.getInstance();
Connection conn = DriverManager.getConnection(hp.getProperty("url"),
hp.getProperty("user"), hp.getProperty("password") );
return conn;
}
//创建表:
public boolean createTable(Statement statement,String sql) throws SQLException{
/**
* 如果第一个结果为 ResultSet 对象,则返回 true
* 如果其为更新计数或者不存在任何结果,则返回 false
* 因为 create 语法不属于DML, 所以执行create sql语句后将返回false。
* 看起来还是有点缺陷。
*/
boolean result = statement.execute(sql);
if( !result ){
return true;
}else{
return false;
}
}
//判断表是否存在
public boolean tableIsExists(Statement statement, String tableName) throws SQLException{
String sql = "show tables '" + tableName + "'";
ResultSet rs = statement.executeQuery(sql);
if (rs.next()) {
return true;
}else{
return false;
}
}
//删除某张表:
public boolean dropTable(Statement statement,String tableName) throws SQLException {
String sql = "drop table " + tableName;
boolean result = statement.execute(sql);
if( !result ){
return true;
}else{
return false;
}
}
//显示某张表或显示所有的表
public ResultSet showTables(Statement statement, String tableName) throws SQLException{
String sql = "";
if(tableName==null||tableName.equals(null)){
sql = "show tables";
}else{
sql = "show tables '" + tableName + "'";
}
ResultSet rs = statement.executeQuery(sql);
return rs;
}
//对表的描述:
public ResultSet describeTable(Statement statement, String tableName) throws SQLException {
String sql = "describe " + tableName;
ResultSet rs = statement.executeQuery(sql);
return rs;
}
/**
*通过文件方式,将文件中数据插入到表中:加载本地本剑数据。
* @throws SQLException
* @filePath: 文件路径, 比如: /home/hadoop/mytestdata/hive.txt
* 注意此路径在虚拟机下。。。
* */
//通过文件方式,将文件中数据插入到表中:加载本地本剑数据。
public boolean loadDataFromLocal(Statement statement, String tableName,String filePath) throws SQLException {
String sql = "load data local inpath '" + filePath + "' into table " + tableName;
boolean result = statement.execute(sql);
if( !result ){
return true;
}else{
return false;
}
}
//通过文件方式,将文件中数据插入到表中:加载本地本剑数据。
public boolean loadDataFromHDFS(Statement statement, String tableName,String filePath) throws SQLException {
String sql = "load data inpath '" + filePath + "' into table " + tableName;
boolean result = statement.execute(sql);
if( !result ){
return true;
}else{
return false;
}
}
//查询表中的数据:
public ResultSet selectData(Statement statement, String tableName) throws SQLException {
String sql = "select * from " + tableName;
ResultSet rs = statement.executeQuery(sql);
return rs;
}
//统计表中数据:
public ResultSet countData(Statement statement, String sql) throws SQLException{
ResultSet rs = statement.executeQuery(sql);
return rs;
}
//关闭连接:
public void close(Connection conn,Statement statement) throws SQLException{
if (conn != null) {
conn.close();
conn = null;
}
if (statement != null) {
statement.close();
statement = null;
}
}
}
四:对基本操作的测试,如下创建TestMain:
public class TestMain {
private HiveUtil hu = new HiveUtil();
private Connection conn = null;
private Statement statement;
//1.测试连接
@Test
public void testConn() throws SQLException{
conn = hu.getConn();
System.out.println( "获取连接为: "+ conn );
}
//2.创建表:
@Test
public void testCreateTable() throws SQLException{
conn = hu.getConn();
statement = conn.createStatement();
String tableName = "testhive";
String sql = "create table " + tableName + " (key int, value string) row format delimited fields terminated by '\t'";
boolean result = hu.createTable(statement, sql);
if( result ){
System.out.println( "创建表成功。" + result );
}else{
System.out.println( "创建表失败。" + result );
}
}
//3.判断表是否存在
@Test
public void testTableIsExists() throws SQLException{
conn = hu.getConn();
statement = conn.createStatement();
String tableName = "testhive";
boolean result = hu.tableIsExists(statement, tableName);
if(result){
System.out.println( "表已经存在。" );
}else{
System.out.println( "表不存在,请新建表。");
}
}
//4.删除表:删除表之前先判断表是否存在。
@Test
public void testDropTable() throws SQLException{
conn = hu.getConn();
statement = conn.createStatement();
String tableName = "testhive";
boolean result = hu.tableIsExists(statement, tableName);
if(result){
System.out.println( "表已经存在,开始删除这张表: " );
result = hu.dropTable(statement, tableName);
if( result ){
System.out.println( "删除表成功。");
}else{
System.out.println( "删除表失败。");
}
}else{
System.out.println( "表不存在,请新建表。");
}
}
//5.显示所有的表
@Test
public void testShowTables() throws SQLException{
conn = hu.getConn();
statement = conn.createStatement();
String tableName = "testhive";
ResultSet rs = hu.showTables(statement, tableName);
while( rs.next() ){
System.out.println( "表名为: "+ rs.getString(1));
}
}
//6.对表的描述
@Test
public void testDescribeTable() throws SQLException{
conn = hu.getConn();
statement = conn.createStatement();
String tableName = "testhive";
//首先判断张表是否存在:
boolean result = hu.tableIsExists(statement, tableName);
if( result ){
//表示表已经存在:
ResultSet rs = hu.describeTable(statement, tableName);
while( rs.next() ){
System.out.println( rs.getString(1) );
}
}else{
System.out.println( "Error: 表不存在,不能对表描述。");
}
}
//7.导入数据: local 和 hdfs
@Test
public void testLoadData() throws SQLException{
conn = hu.getConn();
statement = conn.createStatement();
String tableName = "testhive";
//String localFilePath = "/home/hadoop/mytestdata/hive.txt";
String hdfsFilePath = "hdfs://master:9000/user/hive/hive.txt";
//首先判断张表是否存在:
boolean result = hu.tableIsExists(statement, tableName);
if( result ){
//表示表已经存在:
//加载导入本地数据
//result = hu.loadDataFromLocal(statement, tableName, localFilePath);
//加载导入HDFS上文件数据
result = hu.loadDataFromHDFS(statement, tableName, hdfsFilePath);
if( result ){
System.out.println( "成功将本地文件数据导入到表" + tableName + "中" );
}else{
System.out.println( " 导入失败 。");
}
}else{
System.out.println( "Error: 表不存在,不能对表导入文件数据。");
}
}
//8.查询表中的数据:
@Test
public void testSelectData() throws SQLException{
conn = hu.getConn();
statement = conn.createStatement();
String tableName = "testhive";
//首先判断张表是否存在:
boolean result = hu.tableIsExists(statement, tableName);
if( result ){
//表示表已经存在:
ResultSet rs = hu.selectData(statement, tableName);
while( rs.next() ){
System.out.println( rs.getString(1) +"\t"+ rs.getString(2) );
}
}else{
System.out.println( "Error: 表不存在,不能对表导入文件数据。");
}
}
//9.统计表中的数据:
@Test
public void testCountData() throws SQLException{
conn = hu.getConn();
statement = conn.createStatement();
String tableName = "testhive";
//首先判断张表是否存在:
boolean result = hu.tableIsExists(statement, tableName);
if( result ){
//表示表已经存在:
String sql = "select count(1) from " + tableName;
ResultSet rs = hu.countData(statement, sql);
System.out.println( rs );
while( rs.next() ){
System.out.println( "当前记录数据量为: " + rs.getString(1) );
}
}else{
System.out.println( "Error: 表不存在,不能统计数据。");
}
}
}
5.测试数据:(以Tab隔开。)
1 Berg
1 Berg
2 Cccc
3 Xxxx
4 Jjjj
当然 ,也可创建Maven 的依赖,我的如下:
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
</dependency>
<dependency>
<groupId>jstl</groupId>
<artifactId>jstl</artifactId>
<version>1.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.6.4</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.6.4</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>2.0.0</version>
<exclusions>
<exclusion>
<artifactId>
pentaho-aggdesigner-algorithm
</artifactId>
<groupId>org.pentaho</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>2.0.0</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.5</version>
</dependency>
</dependencies>
以上依赖经过上述测试也是成功的。。。。