向HBase中导入数据1:查询Hive后写入

一、前提条件

Hive表已经创建好,并且远程访问Hive的代码已写好。

Hive表结构如下:

create external table clientdata(screen string, model string, userID string, country string, province string, city string, network string, time string) row format delimited fields terminated by '|' location '/clientdata';

查询Hive的Dao如下:

package cn.edu.shu.ces.chenjie.tianyi.hive.dao.impl;

import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;

import cn.edu.shu.ces.chenjie.tianyi.hive.model.ClientData;
import cn.edu.shu.ces.chenjie.tianyi.hive.utils.JDBCUtils;

public class ClientDataDaoHiveImpl
{
	private static List<ClientData> resultSetToList(ResultSet rs) throws SQLException, ParseException
	{
		List<ClientData> list = new ArrayList<ClientData>();
		while (rs.next())
		{
			String screen = rs.getString("screen");
			String model = rs.getString("model");
			String userID = rs.getString("userID");
			String country = rs.getString("country");
			String province = rs.getString("province");
			String city = rs.getString("city");
			String network = rs.getString("network");
			String time = rs.getString("time");
			ClientData clientData = new ClientData();
			clientData.setCity(city);
			clientData.setCountry(country);
			clientData.setModel(model);
			clientData.setNetwork(network);
			clientData.setProvince(province);
			clientData.setScreen(screen);
			SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
			clientData.setTime(formatter.parse(time));
			clientData.setUserID(userID);
			list.add(clientData);
			System.out.println(clientData);
		}
		return list;
	}
	
	/***
	 * 返回所有的数据
	 * @return
	 */
	public static List<ClientData> list()
	{
		List<ClientData> list = new ArrayList<ClientData>();
		Connection conn = null;
		Statement st = null;
		ResultSet rs = null;
		String sql = "select * from clientdata";
		try
		{
			// 获取连接
			conn = JDBCUtils.getConnection();
			// 创建运行环境
			st = conn.createStatement();
			// 运行HQL
			rs = st.executeQuery(sql);
			// 处理数据
			list = resultSetToList(rs);
		}
		catch (Exception e)
		{
			e.printStackTrace();
		}
		finally
		{
			JDBCUtils.release(conn, st, rs);
		}
		return list;
	}
	
	
	/****
	 * 返回所有的数据
	 * @param page 指定页数
	 * @param pageSize 页长
	 * @return
	 */
	public static List<ClientData> list(int page, int pageSize)
	{
		List<ClientData> list = new ArrayList<ClientData>();
		Connection conn = null;
		PreparedStatement ps = null;
		ResultSet rs = null;
		String sql = "select * from clientdata limit ?,?";
		try
		{
			// 获取连接
			conn = JDBCUtils.getConnection();
			// 创建运行环境
			ps = conn.prepareStatement(sql);
			ps.setInt(1, (page -1 ) * pageSize);
			ps.setInt(2, pageSize);
			// 运行HQL
			rs = ps.executeQuery();
			// 处理数据
			list = resultSetToList(rs);
		}
		catch (Exception e)
		{
			e.printStackTrace();
		}
		finally
		{
			JDBCUtils.release(conn, ps, rs);
		}
		return list;
	}
	
	public static List<ClientData> listByID(String userID)
	{
		List<ClientData> list = new ArrayList<ClientData>();
		Connection conn = null;
		PreparedStatement ps = null;
		ResultSet rs = null;
		
		//TODO
		//String sql = "select * from clientdata_part where userID = ?";
		String sql = "select * from clientdata where userID = ?";
		try
		{
			// 获取连接
			conn = JDBCUtils.getConnection();
			// 创建运行环境
			
			ps = conn.prepareStatement(sql);
			ps.setString(1, userID);
			// 运行HQL
			rs = ps.executeQuery();
			// 处理数据
			list = resultSetToList(rs);
		}
		catch (Exception e)
		{
			e.printStackTrace();
		}
		finally
		{
			JDBCUtils.release(conn, ps, rs);
		}
		return list;
	}
	
	/**
	 * @param args
	 */
	public static void main(String[] args)
	{
		list();
		//list(1,10);
		//listByID("919a6ea2e85301b7bcbe099be5ace41f");
	}

}


二、使用Java操作HBase的操作已写好

使用Java访问远程HBase数据库


三、查询Hive后使用JavaAPI向HBase中写入数据

package cn.edu.shu.ces.chenjie.tianyi.hbase.hive2hbase;

import java.io.IOException;
import java.util.List;

import cn.edu.shu.ces.chenjie.tianyi.hbase.dao.impl.ClientDataDaoHBaseImpl;
import cn.edu.shu.ces.chenjie.tianyi.hive.model.ClientData;
import cn.edu.shu.ces.chenjie.tianyi.hive.dao.impl.ClientDataDaoHiveImpl;
public class ClientDataFromHive2Base
{

	public static void main(String[] args) throws IOException
	{
		ClientDataDaoHBaseImpl.createTable();//创建HBase表
		int i = 1;
		while(true)
		{
			List<ClientData> cds = ClientDataDaoHiveImpl.list(i,100000);//每次查询100000条数据
			if(cds.size() == 0)//如果列表为空说明Hive表已经遍历完,可以终止循环
				break;
			System.out.println("查询完毕,正在保存-------------------------------------->" + i * 1000000);
			ClientDataDaoHBaseImpl.saveList(cds);//保存列表
			i ++;//下一页
		}
	}

}


四、问题分析:

Hive与HBase若部署在同一个集群上,可能造成大量资源消耗


  • 0
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值