本文所做示例是将hive表格中的数据读取出来并存入mysql数据库。前提是以mysql作为hive的元数据库已经调通,因为连接mysql数据库的时候所用的数据库,用户名和密码都要hive的配置文件一致。
1、在eclipse下建立工程,并将1)hive中lib目录下的所有包都导入到该项目。2)将hadoop中的相关包导入。在hadoop-2.x以前只要导入hadoop-xxx-core.jar就可以,hadoop-2.x以后的版本比较蛋疼,被分散在hadoop安装目录的share/hadoop/下的各个子目录里,以防万一全部导入。3)导入mysql的jdbc驱动包。
2、该项目第一个文件叫做ConnectDB,里面两个方法getHiveConnection和getMysqlConnection,分别返回连接到hive或者mysql数据库的一个connection。主要是在Class.forName中导入驱动文件和在getConnection获取连接的时候要对应相应的数据库,要与hive中的配置文件保持一致。
package com.scutcv.mrshen;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
import org.apache.hadoop.hive.ql.parse.HiveParser_IdentifiersParser.nullCondition_return;
import org.codehaus.groovy.ast.stmt.ThrowStatement;
public class ConnectDB {
private static Connection connection = null;
private static Connection connection2mysql = null;
private ConnectDB() {
}
public static Connection getHiveConnection() throws SQLException {
if (connection == null) {
try {
Class.forName("org.apache.hadoop.hive.jdbc.HiveDriver");
} catch (ClassNotFoundException e) {
// TODO: handle exception
e.printStackTrace();
System.exit(1);
}
connection = DriverManager.getConnection("jdbc:hive://master:50031/default","hive","hive");
System.out.println("to hive: " + connection.toString());
}
return connection;
}
public static Connection getMysqlConnection() throws SQLException {
if (connection2mysql == null) {
try {
Class.forName("com.mysql.jdbc.Driver");
} catch (ClassNotFoundException e) {
// TODO: handle exception
e.printStackTrace();
System.exit(1);
}
connection2mysql = DriverManager.getConnection("jdbc:mysql://master:3306/hive?createDataBaseIfNotExist","hive","hive");
System.out.println("to mysql: " + connection2mysql.toString());
}
return connection2mysql;
}
public static void closeHive() throws SQLException {
if (connection != null) {
connection.close();
}
}
public static void closeMysql() throws SQLException {
if (connection2mysql != null) {
connection2mysql.close();
}
}
}
package com.scutcv.mrshen;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
public class hiveUtil {
public static void createTablseOrloadData(String hql)throws SQLException {
Connection connection = ConnectDB.getHiveConnection();
Statement statement = connection.createStatement();
ResultSet resultSet = statement.executeQuery(hql);
}
public static ResultSet queryHive(String hql) throws SQLException {
Connection connection = ConnectDB.getHiveConnection();
Statement statement = connection.createStatement();
ResultSet resultSet = statement.executeQuery(hql);
return resultSet;
}
public static void hive2Mysql(ResultSet resultSet) throws SQLException {
Connection connection = ConnectDB.getMysqlConnection();
Statement statement = connection.createStatement();
while(resultSet.next()) {
/*
int cid = resultSet.getInt(1);
int type = resultSet.getInt(3);
int aid = resultSet.getInt(5);
String time = resultSet.getString(2);
String adv = resultSet.getString(4);
String conutry = resultSet.getString(6);
String prov = resultSet.getString(7);
String city = resultSet.getString(8);
String snp = resultSet.getString(9);
String ptype = resultSet.getString(10);
String phone = resultSet.getString(11);
int stateCode = statement.executeUpdate(getSQLString(cid, time, type, adv, aid, conutry, prov, city, snp, ptype, phone));
*/
int key = resultSet.getInt(1);
String val = resultSet.getString(2);
int stateCode = statement.executeUpdate("insert into KVtest values(" + key + ",'" + val + "')");
}
}
public static String getSQLString(int cid,
String time,
int type,
String adv,
int aid,
String country,
String prov,
String city,
String snp,
String ptype,
String phone) {
String sql = "insert into dmining values(" + cid + "," + time + "," + type + "," + adv + "," + aid + "," +
country + "," + prov + "," + city + "," + snp + "," + ptype + "," + phone + ")";
return sql;
}
public static void printQueryResult(ResultSet resultSet) throws SQLException {
while(resultSet.next()) {
int cid = resultSet.getInt(1);
int type = resultSet.getInt(3);
int aid = resultSet.getInt(5);
String time = resultSet.getString(2);
String adv = resultSet.getString(4);
String country = resultSet.getString(6);
String prov = resultSet.getString(7);
String city = resultSet.getString(8);
String snp = resultSet.getString(9);
String ptype = resultSet.getString(10);
String phone = resultSet.getString(11);
System.out.println("cid: " + cid);
System.out.println("time: " + time);
System.out.println("ad_type: " + type);
System.out.println("advertisment: " + adv);
System.out.println("aid: " + aid);
System.out.println("country: " + country);
System.out.println("province: " + prov);
System.out.println("city: " + city);
System.out.println("service network provider: " + snp);
System.out.println("phone_type: " + ptype);
System.out.println("phone: " + phone + '\n');
}
}
}
其中:
1)由于创建表格和导入数据逻辑步骤完全一样,只是查询的语句有区别,因此我把他们合并写作一个方法createTablesOrloadData。
2)queryHive方法返回hive的查询结果
3)hive2Mysql方法将方法2)中的结果通过迭代读取并插入mysql中的数据库。由于一开始我建立的表格要插入的数据含有中文导致乱码,因为只是测试,所以新加了printQueryResult方法将查询结果简单打印出来。为了节约时间我在hive中新建了表格kv2,并将hive的测试用例导入,因此注释掉的部分忽略不计。值得注意的是,在返回的查询结果resultSet中,取数据的时候下标是从1而不是从0开始的,一开始我从0读取,结果报了数据越界的异常。如下图。
package com.scutcv.mrshen;
import java.sql.ResultSet;
import java.sql.SQLException;
public class execHive {
public static void main(String[] args) throws SQLException {
ResultSet resultSet = hiveUtil.queryHive("select * from kv2");
hiveUtil.hive2Mysql(resultSet);
// hiveUtil.printQueryResult(resultSet);
// String createHql = "create table kv2(id INT, name string)";
// hiveUtil.createTablseOrloadData(createHql);
// String loadHql = "load data local inpath '/home/hduser/hive-0.13.1/examples/files/kv2.txt' overwrite into table kv2";
// hiveUtil.createTablseOrloadData(loadHql);
ConnectDB.closeHive();
ConnectDB.closeMysql();
}
}
上述是将hive的查询数据写入mysql的测试,另外注释掉的还有在hive中创建kv2,导入数据等。按需自取。在运行execHive之前必须将hive服务开起来,指定的端口号要和ConnectDB中获取hive连接的配置一致。
参考:实战Hadoop——开启通向云计算的捷径_刘鹏