一、前提条件
Hive表已经创建好,并且远程访问Hive的代码已写好。
Hive表结构如下:
create external table clientdata(screen string, model string, userID string, country string, province string, city string, network string, time string) row format delimited fields terminated by '|' location '/clientdata';
查询Hive的Dao如下:
package cn.edu.shu.ces.chenjie.tianyi.hive.dao.impl;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
import cn.edu.shu.ces.chenjie.tianyi.hive.model.ClientData;
import cn.edu.shu.ces.chenjie.tianyi.hive.utils.JDBCUtils;
public class ClientDataDaoHiveImpl
{
private static List<ClientData> resultSetToList(ResultSet rs) throws SQLException, ParseException
{
List<ClientData> list = new ArrayList<ClientData>();
while (rs.next())
{
String screen = rs.getString("screen");
String model = rs.getString("model");
String userID = rs.getString("userID");
String country = rs.getString("country");
String province = rs.getString("province");
String city = rs.getString("city");
String network = rs.getString("network");
String time = rs.getString("time");
ClientData clientData = new ClientData();
clientData.setCity(city);
clientData.setCountry(country);
clientData.setModel(model);
clientData.setNetwork(network);
clientData.setProvince(province);
clientData.setScreen(screen);
SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
clientData.setTime(formatter.parse(time));
clientData.setUserID(userID);
list.add(clientData);
System.out.println(clientData);
}
return list;
}
/***
* 返回所有的数据
* @return
*/
public static List<ClientData> list()
{
List<ClientData> list = new ArrayList<ClientData>();
Connection conn = null;
Statement st = null;
ResultSet rs = null;
String sql = "select * from clientdata";
try
{
// 获取连接
conn = JDBCUtils.getConnection();
// 创建运行环境
st = conn.createStatement();
// 运行HQL
rs = st.executeQuery(sql);
// 处理数据
list = resultSetToList(rs);
}
catch (Exception e)
{
e.printStackTrace();
}
finally
{
JDBCUtils.release(conn, st, rs);
}
return list;
}
/****
* 返回所有的数据
* @param page 指定页数
* @param pageSize 页长
* @return
*/
public static List<ClientData> list(int page, int pageSize)
{
List<ClientData> list = new ArrayList<ClientData>();
Connection conn = null;
PreparedStatement ps = null;
ResultSet rs = null;
String sql = "select * from clientdata limit ?,?";
try
{
// 获取连接
conn = JDBCUtils.getConnection();
// 创建运行环境
ps = conn.prepareStatement(sql);
ps.setInt(1, (page -1 ) * pageSize);
ps.setInt(2, pageSize);
// 运行HQL
rs = ps.executeQuery();
// 处理数据
list = resultSetToList(rs);
}
catch (Exception e)
{
e.printStackTrace();
}
finally
{
JDBCUtils.release(conn, ps, rs);
}
return list;
}
public static List<ClientData> listByID(String userID)
{
List<ClientData> list = new ArrayList<ClientData>();
Connection conn = null;
PreparedStatement ps = null;
ResultSet rs = null;
//TODO
//String sql = "select * from clientdata_part where userID = ?";
String sql = "select * from clientdata where userID = ?";
try
{
// 获取连接
conn = JDBCUtils.getConnection();
// 创建运行环境
ps = conn.prepareStatement(sql);
ps.setString(1, userID);
// 运行HQL
rs = ps.executeQuery();
// 处理数据
list = resultSetToList(rs);
}
catch (Exception e)
{
e.printStackTrace();
}
finally
{
JDBCUtils.release(conn, ps, rs);
}
return list;
}
/**
* @param args
*/
public static void main(String[] args)
{
list();
//list(1,10);
//listByID("919a6ea2e85301b7bcbe099be5ace41f");
}
}
二、使用Java操作HBase的操作已写好
三、查询Hive后使用JavaAPI向HBase中写入数据
package cn.edu.shu.ces.chenjie.tianyi.hbase.hive2hbase;
import java.io.IOException;
import java.util.List;
import cn.edu.shu.ces.chenjie.tianyi.hbase.dao.impl.ClientDataDaoHBaseImpl;
import cn.edu.shu.ces.chenjie.tianyi.hive.model.ClientData;
import cn.edu.shu.ces.chenjie.tianyi.hive.dao.impl.ClientDataDaoHiveImpl;
public class ClientDataFromHive2Base
{
public static void main(String[] args) throws IOException
{
ClientDataDaoHBaseImpl.createTable();//创建HBase表
int i = 1;
while(true)
{
List<ClientData> cds = ClientDataDaoHiveImpl.list(i,100000);//每次查询100000条数据
if(cds.size() == 0)//如果列表为空说明Hive表已经遍历完,可以终止循环
break;
System.out.println("查询完毕,正在保存-------------------------------------->" + i * 1000000);
ClientDataDaoHBaseImpl.saveList(cds);//保存列表
i ++;//下一页
}
}
}
四、问题分析:
Hive与HBase若部署在同一个集群上,可能造成大量资源消耗