使用java连接hive，并执行hive语句详解

最新推荐文章于 2024-08-23 02:06:28 发布

蜡笔小吴

最新推荐文章于 2024-08-23 02:06:28 发布

阅读量5.7k

点赞数

分类专栏： hadoop 文章标签： hbase hive java

hadoop 专栏收录该内容

11 篇文章 0 订阅

订阅专栏

第一篇：

安装hadoop 和 hive我就不多说了，网上太多文章自己看去

首先，在机器上打开hiveservice

[html] view plain copy

hive --service hiveserver -p 50000 &

打开50000端口，然后java就可以使用java连了，需要的jar包我发个图片

就这多jar包，必须的

不多说，直接上代码

[java] view plain copy

package asia.wildfire.hive.service;
import java.sql.*;
import java.sql.Date;
import java.text.SimpleDateFormat;
import java.util.*;
/**
* User: liuxiaochen
* Date: 13-9-24
* Time: 下午5:47
* 修改描述
*/
public class HiveService {
private static final String URLHIVE = "jdbc:hive://ip:50000/default";
private static Connection connection = null;
public static Connection getHiveConnection() {
if (null == connection) {
synchronized (HiveService.class) {
if (null == connection) {
try {
Class.forName("org.apache.hadoop.hive.jdbc.HiveDriver");
connection = DriverManager.getConnection(URLHIVE, "", "");
} catch (SQLException e) {
e.printStackTrace();
} catch (ClassNotFoundException e) {
e.printStackTrace();
}
}
}
}
return connection;
}
public static void createTable() throws SQLException {
String tweetTableSql = "DROP TABLE IF EXISTS hive_crm_tweet2222";
String createTable1 = "CREATE EXTERNAL TABLE hive_crm_tweet2222(tweet_id string, cuser_id string, created_at bigint, year bigint, month bigint, day bigint, hour bigint, text string, comments_count bigint, reposts_count bigint, source string, retweeted_id string, post_type string, sentiment string, positive_tags_string string, predict_tags_string string, tags_string string) STORED BY 'org.apache.hadoop.hive.dynamodb.DynamoDBStorageHandler' TBLPROPERTIES (\"dynamodb.table.name\" = \"crm_tweet\",\"dynamodb.column.mapping\" = \"tweet_id:tweet_id,cuser_id:cuser_id,created_at:created_at,year:year,month:month,day:day,hour:hour,text:text,comments_count:comments_count,reposts_count:reposts_count,source:source,retweeted_id:retweeted_id,post_type:post_type,sentiment:sentiment,positive_tags_string:positive_tags_string,predict_tags_string:predict_tags_string,tags_string:tags_string\")";
String commentTableSql = "DROP TABLE IF EXISTS hive_tweet_comment2222";
String createTable2 = "CREATE EXTERNAL TABLE hive_tweet_comment2222(tweet_id string,comment_id string, cuser_id string, user_id string, created_at bigint, year bigint, month bigint, day bigint, hour bigint, text string, comments_count bigint, reposts_count bigint, source string, topic_id string, post_type string, sentiment string) STORED BY 'org.apache.hadoop.hive.dynamodb.DynamoDBStorageHandler' TBLPROPERTIES (\"dynamodb.table.name\" = \"crm_tweet_comment\",\"dynamodb.column.mapping\" = \"tweet_id:tweet_id,comment_id:comment_id,cuser_id:cuser_id,user_id:user_id,created_at:created_at,year:year,month:month,day:day,hour:hour,text:text,comments_count:comments_count,reposts_count:reposts_count,source:source,topic_id:tweet_id,post_type:post_type,sentiment:sentiment\")";
String retweetTableSql = "DROP TABLE IF EXISTS hive_tweet_retweet2222";
String createTable3 = "CREATE EXTERNAL TABLE hive_tweet_retweet2222(tweet_id string, cuser_id string, user_id string, retweet_id string, created_at BIGINT, year BIGINT, month BIGINT, day BIGINT, hour BIGINT, text string, comments_count BIGINT, reposts_count BIGINT, source string, topic_id string, verified_type BIGINT, post_type string, sentiment string) STORED BY 'org.apache.hadoop.hive.dynamodb.DynamoDBStorageHandler' TBLPROPERTIES (\"dynamodb.table.name\" = \"crm_tweet_retweet\",\"dynamodb.column.mapping\" = \"tweet_id:tweet_id,cuser_id:cuser_id,user_id:user_id,retweet_id:retweet_id,created_at:created_at,year:year,month:month,day:day,hour:hour,text:text,comments_count:comments_count,reposts_count:reposts_count,source:source,topic_id:tweet_id,verified_type:verified_type,post_type:post_type,sentiment:sentiment\")";
Statement stmt = getHiveConnection().createStatement();
stmt.executeQuery(tweetTableSql);
stmt.executeQuery(createTable1);
stmt.executeQuery(commentTableSql);
stmt.executeQuery(createTable2);
stmt.executeQuery(retweetTableSql);
stmt.executeQuery(createTable3);
}
public static void selectTweet() throws SQLException {
long aaa = System.currentTimeMillis();
long start = DateUtils.getNDaysAgo(DateUtils.getMidNight(), 15).getTime().getTime();
long end = DateUtils.getNDaysAgo(DateUtils.getMidNight(), 13).getTime().getTime();
String sql = "select cuser_id, count(*) as tw_hour, year, month, day from hive_crm_tweet2222 where created_at > ? and created_at < ? and cuser_id = ? group by cuser_id, year, month, day, hour";
PreparedStatement pstm = getHiveConnection().prepareStatement(sql);
pstm.setLong(1, start);
pstm.setLong(2, end);
pstm.setString(3, "2176270443");
ResultSet rss = pstm.executeQuery();
while (rss.next()) {
System.out.println("1: " + rss.getString("cuser_id") + " 2: " + rss.getInt("tw_hour") + " 3: " + rss.getInt("year") + " 4: " + rss.getInt("month") + " 5: " + rss.getInt("day"));
}
System.out.println(System.currentTimeMillis() - aaa);
}
public static void selectTweet22() throws SQLException {
long aaa = System.currentTimeMillis();
long start = DateUtils.getNDaysAgo(DateUtils.getMidNight(), 15).getTime().getTime();
long end = DateUtils.getNDaysAgo(DateUtils.getMidNight(), 13).getTime().getTime();
String sql = "select cuser_id, created_at, tweet_id from hive_crm_tweet2222 where created_at > ? and created_at < ? and cuser_id = ?";
PreparedStatement pstm = getHiveConnection().prepareStatement(sql);
pstm.setLong(1, start);
pstm.setLong(2, end);
pstm.setString(3, "2176270443");
ResultSet rss = pstm.executeQuery();
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH");
while (rss.next()) {
long cc = Long.valueOf(String.valueOf(rss.getInt("created_at")) + "000");
java.util.Date date = new java.util.Date(cc);
System.out.println(dateFormat.format(date));
System.out.println(rss.getString("cuser_id") + " " + rss.getString("tweet_id"));
}
System.out.println(System.currentTimeMillis() - aaa);
}
public static void main(String[] args) throws ClassNotFoundException, SQLException {
// Class.forName("org.apache.hadoop.hive.jdbc.HiveDriver");
// String querySQL = "SELECT a.* FROM test_time a";
//
// Connection con = DriverManager.getConnection(URLHIVE, "", "");
// Statement stmt = con.createStatement();
// ResultSet res = stmt.executeQuery(querySQL); // 执行查询语句
//
// while (res.next()) {
// System.out.println("Result: key:" + res.getString(1) + " –> value:" + res.getString(2));
// }
selectTweet22();
// SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH");
// System.out.println(dateFormat.format(new java.util.Date()));
}
}

第二篇：

我们可以通过CLI、Client、Web UI等Hive提供的用户接口来和Hive通信，但这三种方式最常用的是CLI；Client 是Hive的客户端，用户连接至 Hive Server。在启动 Client 模式的时候，需要指出Hive Server所在节点，并且在该节点启动 Hive Server。 WUI 是通过浏览器访问 Hive。今天我们来谈谈怎么通过HiveServer来操作Hive。

　　Hive提供了jdbc驱动，使得我们可以用Java代码来连接Hive并进行一些类关系型数据库的sql语句查询等操作。同关系型数据库一样，我们也需要将Hive的服务打开；在Hive 0.11.0版本之前，只有HiveServer服务可用，你得在程序操作Hive之前，必须在Hive安装的服务器上打开HiveServer服务，如下：

 
         [wyp 
         @localhost 
          /home/q/hive- 
         0.11 
         . 
         0 
         ]$ bin/hive --service hiveserver -p  
         10002 
        
         Starting Hive Thrift Server

上面代表你已经成功的在端口为10002（默认的端口是10000）启动了hiveserver服务。这时候，你就可以通过Java代码来连接hiveserver，代码如下：

 
         package 
          com.wyp; 
        
         /** 
        
         * User: 过往记忆 
        
         * Blog:  http://www.iteblog.com/ 
        
         * Date: 13-11-27 
        
         * Time: 下午5:52 
        
         */ 
        
         import 
          java.sql.SQLException; 
        
         import 
          java.sql.Connection; 
        
         import 
          java.sql.ResultSet; 
        
         import 
          java.sql.Statement; 
        
         import 
          java.sql.DriverManager; 
        
         public 
          class 
          HiveJdbcTest { 
        
         private 
          static 
          String driverName =  
        
         "org.apache.hadoop.hive.jdbc.HiveDriver" 
         ; 
        
         public 
          static 
          void 
          main(String[] args)  
        
         throws 
          SQLException { 
        
         try 
          { 
        
         Class.forName(driverName); 
        
         }  
         catch 
          (ClassNotFoundException e) { 
        
         e.printStackTrace(); 
        
         System.exit( 
         1 
         ); 
        
         } 
        
         Connection con = DriverManager.getConnection( 
        
         "jdbc:hive://localhost:10002/default" 
         ,  
         "wyp" 
         ,  
         "" 
         ); 
        
         Statement stmt = con.createStatement(); 
        
         String tableName =  
         "wyphao" 
         ; 
        
         stmt.execute( 
         "drop table if exists " 
          + tableName); 
        
         stmt.execute( 
         "create table " 
          + tableName +  
        
         " (key int, value string)" 
         ); 
        
         System.out.println( 
         "Create table success!" 
         ); 
        
         // show tables 
        
         String sql =  
         "show tables '" 
          + tableName +  
         "'" 
         ; 
        
         System.out.println( 
         "Running: " 
          + sql); 
        
         ResultSet res = stmt.executeQuery(sql); 
        
         if 
          (res.next()) { 
        
         System.out.println(res.getString( 
         1 
         )); 
        
         } 
        
         // describe table 
        
         sql =  
         "describe " 
          + tableName; 
        
         System.out.println( 
         "Running: " 
          + sql); 
        
         res = stmt.executeQuery(sql); 
        
         while 
          (res.next()) { 
        
         System.out.println(res.getString( 
         1 
         ) +  
         "\t" 
          + res.getString( 
         2 
         )); 
        
         } 
        
         sql =  
         "select * from " 
          + tableName; 
        
         res = stmt.executeQuery(sql); 
        
         while 
          (res.next()) { 
        
         System.out.println(String.valueOf(res.getInt( 
         1 
         )) +  
         "\t" 
        
         + res.getString( 
         2 
         )); 
        
         } 
        
         sql =  
         "select count(1) from " 
          + tableName; 
        
         System.out.println( 
         "Running: " 
          + sql); 
        
         res = stmt.executeQuery(sql); 
        
         while 
          (res.next()) { 
        
         System.out.println(res.getString( 
         1 
         )); 
        
         } 
        
         } 
        
         }

编译上面的代码，之后就可以运行(我是在集成开发环境下面运行这个程序的)，结果如下：

 
         Create table success! 
        
         Running: show tables  
         'wyphao' 
        
         wyphao 
        
         Running: describe wyphao 
        
         key                      
         int                 
        
         value                   string               
        
         Running: select count( 
         1 
         ) from wyphao 
        
         0 
        
         Process finished with exit code  
         0

　　如果你想在脚本里面运行，请将上面的程序打包成jar文件，并将上面的依赖库放在/home/wyp/lib/（这个根据你自己的情况弄）中，同时加入到运行的环境变量，脚本如下：

 
          #!/bin/bash 
         
          HADOOP_HOME=/home/q/hadoop- 
          2.2 
          . 
          0 
         
          HIVE_HOME=/home/q/hive- 
          0.11 
          . 
          0 
          -bin 
         
          CLASSPATH=$CLASSPATH: 
         
          for 
           i in /home/wyp/lib/*.jar ;  
          do 
         
          CLASSPATH=$CLASSPATH:$i 
         
          done 
         
          echo $CLASSPATH 
         
          /home/q/java/jdk1. 
          6 
          .0_20/bin/java -cp  \ 
         
          $CLASSPATH:/export1/tmp/yangping.wu/OutputText.jar  com.wyp.HiveJdbcTest

上面是用Java连接HiveServer，而HiveServer本身存在很多问题（比如：安全性、并发性等）；针对这些问题，Hive0.11.0版本提供了一个全新的服务：HiveServer2，这个很好的解决HiveServer存在的安全性、并发性等问题。这个服务启动程序在${HIVE_HOME}/bin/hiveserver2里面，你可以通过下面的方式来启动HiveServer2服务：

 
         $HIVE_HOME/bin/hiveserver2

也可以通过下面的方式启动HiveServer2

 
         $HIVE_HOME/bin/hive --service hiveserver2

两种方式效果都一样的。但是以前的程序需要修改两个地方，如下所示：

 
         private 
          static 
          String driverName =  
         "org.apache.hadoop.hive.jdbc.HiveDriver" 
         ; 
        
         改为 
        
         private 
          static 
          String driverName =  
         "org.apache.hive.jdbc.HiveDriver" 
         ; 
        
         Connection con = DriverManager.getConnection( 
        
         "jdbc:hive://localhost:10002/default" 
         ,  
         "wyp" 
         ,  
         "" 
         ); 
        
         改为 
        
         Connection con = DriverManager.getConnection( 
        
         "jdbc:hive2://localhost:10002/default" 
         ,  
         "wyp" 
         ,  
         "" 
         );

其他的不变就可以了。

　　这里顺便说说本程序所依赖的jar包，一共有以下几个：

 
    
         hadoop- 
         2.2 
         . 
         0 
         /share/hadoop/common/hadoop-common- 
         2.2 
         . 
         0 
         .jar 
        
 
         $HIVE_HOME/lib/hive-exec- 
         0.11 
         . 
         0 
         .jar  
        
 
         $HIVE_HOME/lib/hive-jdbc- 
         0.11 
         . 
         0 
         .jar  
        
 
         $HIVE_HOME/lib/hive-metastore- 
         0.11 
         . 
         0 
         .jar   
        
 
         $HIVE_HOME/lib/hive-service- 
         0.11 
         . 
         0 
         .jar    
        
 
         $HIVE_HOME/lib/libfb303- 
         0.9 
         . 
         0 
         .jar    
        
 
         $HIVE_HOME/lib/commons-logging- 
         1.0 
         . 
         4 
         .jar   
        
 
         $HIVE_HOME/lib/slf4j-api- 
         1.6 
         . 
         1 
         .jar 
        
 
  

　　如果你是用Maven，加入以下依赖

 
       <dependency> 
      
       <groupId>org.apache.hive</groupId> 
      
       <artifactId>hive-jdbc</artifactId> 
      
       <version> 
       0.11 
       . 
       0 
       </version> 
      
       </dependency> 
      
       <dependency> 
      
       <groupId>org.apache.hadoop</groupId> 
      
       <artifactId>hadoop-common</artifactId> 
      
       <version> 
       2.2 
       . 
       0 
       </version> 
      
       </dependency>