Hbase批量导入数据,支持多线程同时操作


/**
 * HBase操作工具类:缓存模式多线程批量提交作业到hbase
 *
 * @Auther: ning.zhang
 * @Email: ning.zhang@phicomm.com
 * @CreateDate: 2018/7/30
 */
public class HBaseUtils {

    ThreadLocal<List<Put>> threadLocal = new ThreadLocal<List<Put>>();
    HBaseAdmin admin = null;
    Connection conn = null;

    private HBaseUtils() {
        Configuration configuration = new Configuration();
        configuration.set("hbase.zookeeper.quorum", ServerConfigs.ZK);
        configuration.set("hbase.rootdir", "hdfs://hadoop-23:8020/hbase");

        try {
            conn = ConnectionFactory.createConnection(configuration);
            admin = new HBaseAdmin(configuration);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    private static HBaseUtils instance = null;

    public static synchronized HBaseUtils getInstance() {
        if (null == instance) {
            instance = new HBaseUtils();
        }
        return instance;
    }


    /**
     * 根据表名获取到HTable实例
     */
    public HTable getTable(String tableName) {

        HTable table = null;
        try {
//            table = new HTable(configuration, tableName);
            final TableName tname = TableName.valueOf(tableName);
            table = (HTable) conn.getTable(tname);

        } catch (IOException e) {
            e.printStackTrace();
        }

        return table;
    }

    /**
     * 批量添加记录到HBase表,同一线程要保证对相同表进行添加操作!
     *
     * @param tableName HBase表名
     * @param rowkey    HBase表的rowkey
     * @param cf        HBase表的columnfamily
     * @param column    HBase表的列key
     * @param value     写入HBase表的值value
     */
    public void bulkput(String tableName, String rowkey, String cf, String column, String value) {
        try {
            List<Put> list = threadLocal.get();
            if (list == null) {
                list = new ArrayList<Put>();
            }
            Put put = new Put(Bytes.toBytes(rowkey));
            put.addColumn(Bytes.toBytes(cf), Bytes.toBytes(column), Bytes.toBytes(value));
            list.add(put);
            if (list.size() >= ServerConfigs.CACHE_LIST_SIZE) {
                HTable table = getTable(tableName);
                table.put(list);
                list.clear();
            } else {
                threadLocal.set(list);
            }
//            table.flushCommits();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * 添加单条记录到HBase表
     *
     * @param tableName HBase表名
     * @param rowkey    HBase表的rowkey
     * @param cf        HBase表的columnfamily
     * @param column    HBase表的列key
     * @param value     写入HBase表的值value
     */
    public void put(String tableName, String rowkey, String cf, String column, String value) {

        HTable table = getTable(tableName);
        Put put = new Put(Bytes.toBytes(rowkey));
        put.addColumn(Bytes.toBytes(cf), Bytes.toBytes(column), Bytes.toBytes(value));
        try {
            table.put(put);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * test
     *
     * @param args
     */
    public static void main(String[] args) {

        //HTable table = HBaseUtils.getInstance().getTable("imooc_course_clickcount");
        //System.out.println(table.getName().getNameAsString());
        long start = System.currentTimeMillis();

        String tableName = "t1";
//        String rowkey = "1";
        for (int i = 0; i < 100000; i++) {
            HBaseUtils.getInstance().bulkput(tableName, i + "", "f1", "id", String.valueOf(100321 + i));
        }
        new Thread(new Runnable() {
            public void run() {
                for (int i = 100000; i < 200000; i++) {
                    HBaseUtils.getInstance().bulkput("t1", i + "", "f1", "id", String.valueOf(100321 + i));
                }
            }
        }).start();

        System.out.println(System.currentTimeMillis() - start);
    }

}

其中ServerConfigs配置如下:

/**
 * 配置文件
 * @Auther: ning.zhang
 * @Email:  ning.zhang@phicomm.com
 * @CreateDate: 2018/7/30
 */
public class ServerConfigs {

    public static final String ZK = "172.17.245.23:2181,172.17.245.25:2181,172.17.245.26:2181";

    public static final String TOPIC = "ad_upload_event";

    public static final String BROKER_LIST = "172.17.245.23:9092";

    public static final String GROUP_ID = "test_group";

    public static final int CACHE_LIST_SIZE = 100; //批量提交数据条数

}

测试结果:20w条数据,双线程插入Hbase 耗时5.32s

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值