HBase表设计----预分区和散列存储

最新推荐文章于 2022-11-22 18:53:16 发布

huangyueranbbc

最新推荐文章于 2022-11-22 18:53:16 发布

阅读量776

点赞数 2

分类专栏：大数据 hbase 文章标签： hbase hash散列大数据预分区

本文链接：https://blog.csdn.net/huangyueranbbc/article/details/84326400

版权

大数据同时被 2 个专栏收录

34 篇文章 1 订阅

订阅专栏

hbase

2 篇文章 0 订阅

订阅专栏

hbase设计存在一个常见的问题便是HBase对于row的不均衡分布，它们被存储在一个唯一的rowkey区间中，被称为region，区间的范围被称为Start Key和End Key。

热门数据key连续，导致热门数据被分到同一个region中，即同一个服务器节点中，会导致该节点的压力远大于其他服务器节点的压力，数据倾斜。

解决思路很简单：散列存储+预分区

散列可以采用hash散列，作为前缀，来让数据尽可能均衡的分布到不通的region中，然后对region进行预分区即可。

生成hash前缀的方法：

byte prefix =
(byte) (Long.hashCode(timestamp) % <number of regionservers>);
byte[] rowkey =
Bytes.add(Bytes.toBytes(prefix), Bytes.toBytes(timestamp);

Java实现代码：

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.*;

/**
 * @author huangyueran
 */
public class HBasePrePartition {

    // HBase 基本 API
    private Connection connection; // HTablePool被弃用 因为线程不安全。使用Connection代替，只要保证全局是同一个Connection就可以。
    private Admin admin;
    private Table table;

    private final Logger log = LoggerFactory.getLogger(HBasePrePartition.class);

    TableName tableName = TableName.valueOf("phone_2018812");

    @Before
    public void begin() throws MasterNotRunningException, ZooKeeperConnectionException, IOException {
        Configuration conf = new Configuration();
        // 指定 Zookeeper集群
        // conf.set("hbase.zookeeper.quorum", "192.168.0.193:2181");

        conf.set("hbase.zookeeper.quorum", "192.168.0.193");

        connection = ConnectionFactory.createConnection(conf);

        table = connection.getTable(tableName);

        admin = connection.getAdmin();
        System.out.println("admin:"+admin);
    }

    @After
    public void end() {
        try {
            if (admin != null) {
                admin.close();
            }
            if (table != null) {
                table.close();
            }
            if (connection != null) {
                connection.close();
            }
        } catch (IOException e) {
            e.printStackTrace();
        }

    }

    /**
     * @throws IOException
     * @category 创建表
     */
    @Test
    public void createTable() throws IOException {
        // 判断表是否存在 如果存在 删除表
        if (admin.tableExists(tableName)) {
            admin.disableTable(tableName);
            admin.deleteTable(tableName);
        }

        HTableDescriptor desc = new HTableDescriptor(tableName); // 创建表描述

        // 创建列族 1-3个列族最佳
        HColumnDescriptor family = new HColumnDescriptor("cf1");
        family.setBlockCacheEnabled(true); // 打开读缓存
        family.setInMemory(true); // 打开写缓存
        family.setMaxVersions(1); // 最大版本数

        desc.addFamily(family);

        admin.createTable(desc);
    }

    /**
     * 预分区 创建表
     */
    @Test
    public void createTableBySplitKeys() {
        try {
            TableName tableName = TableName.valueOf("testyufenqu");
            List<String> columnFamily = new ArrayList();
            columnFamily.add("info");

            if (admin.tableExists(tableName)) {
                System.out.println("table has exist!" + tableName);
            } else {
                HTableDescriptor tableDescriptor = new HTableDescriptor(tableName);
                for (String cf : columnFamily) {
                    tableDescriptor.addFamily(new HColumnDescriptor(cf));
                }
                byte[][] splitKeys = getSplitKeys();
                admin.createTable(tableDescriptor, splitKeys);//指定splitkeys
                System.out.println("===Create Table " + tableName
                        + " Success!columnFamily:" + columnFamily.toString()
                        + "===");
            }
        } catch (MasterNotRunningException e) {
            e.printStackTrace();
        } catch (ZooKeeperConnectionException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * @throws IOException
     * @category 预分区插入数据 异步
     */
    @Test
    public void asyncInsertByRegion() throws IOException {
        long startTime = System.currentTimeMillis();
        TableName tableName = TableName.valueOf("testyufenqu");

        // 批量插入异常监听
        final BufferedMutator.ExceptionListener listener = new BufferedMutator.ExceptionListener() {
            public void onException(RetriesExhaustedWithDetailsException e, BufferedMutator mutator) {
                for (int i = 0; i < e.getNumExceptions(); i++) {
                    System.out.println("Failed to sent put " + e.getRow(i) + ".");
                }
            }
        };
        BufferedMutatorParams params = new BufferedMutatorParams(tableName)
                .listener(listener);
        params.writeBufferSize(1024*1024);
        final BufferedMutator mutator = connection.getBufferedMutator(params);


        try {
            mutator.mutate(batchPut());
            mutator.flush();
        } finally {
            mutator.close();
        }
        log.info("cost time:{}ms", System.currentTimeMillis() - startTime);
    }

    /**
     * @throws IOException
     * @category 预分区插入数据
     */
    @Test
    public void insertByRegion() throws IOException {
        long startTime = System.currentTimeMillis();
        TableName tableName = TableName.valueOf("testyufenqu");
        Table table1 = connection.getTable(tableName);
        table1.put(batchPut());
        log.info("cost time:{}ms", System.currentTimeMillis() - startTime);
    }

    private static List<Put> batchPut() {
        List<Put> list = new ArrayList<Put>();
        for (int i = 1; i <= 10000; i++) {
            byte[] rowkey = Bytes.toBytes(getRandomNumber() + "-" + System.currentTimeMillis() + "-" + i);
            Put put = new Put(rowkey);
            put.add(Bytes.toBytes("info"), Bytes.toBytes("name"), Bytes.toBytes("zs" + i));
            list.add(put);
        }
        return list;
    }

    private static String getRandomNumber() {
        String ranStr = Math.random() + "";
        int pointIndex = ranStr.indexOf(".");
        return ranStr.substring(pointIndex + 1, pointIndex + 3);
    }

    private byte[][] getSplitKeys() {
        String[] keys = new String[]{"10|", "20|", "30|", "40|", "50|",
                "60|", "70|", "80|", "90|"};
        byte[][] splitKeys = new byte[keys.length][];
        TreeSet<byte[]> rows = new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);//升序排序
        for (int i = 0; i < keys.length; i++) {
            rows.add(Bytes.toBytes(keys[i]));
        }
        Iterator<byte[]> rowKeyIter = rows.iterator();
        int i = 0;
        while (rowKeyIter.hasNext()) {
            byte[] tempRow = rowKeyIter.next();
            rowKeyIter.remove();
            splitKeys[i] = tempRow;
            i++;
        }
        return splitKeys;
    }

    /**
     * @throws IOException
     * @category 插入数据
     */
    @Test
    public void insert() throws IOException {
        byte[] rowkey = ("18286352451_" + System.currentTimeMillis()).getBytes();
        Put puts = new Put(rowkey);
        puts.addColumn("cf1".getBytes(), "dest".getBytes(), "13805557773".getBytes());
        puts.addColumn("cf1".getBytes(), "type".getBytes(), "1".getBytes());
        puts.addColumn("cf1".getBytes(), "time".getBytes(), "2015-09-09 16:55:29".getBytes());
        table.put(puts);
    }


    /**
     * 异步往指定表添加数据 批量插入
     *
     * @return long                返回执行时间
     * @throws IOException
     */
    @Test
    public void put() throws Exception {
        String tablename = "t_cdr";

        for(int i=0;i<10000000;i++){
            List<Put> puts = getPuts();
            // 批量插入异常监听
            final BufferedMutator.ExceptionListener listener = new BufferedMutator.ExceptionListener() {
                public void onException(RetriesExhaustedWithDetailsException e, BufferedMutator mutator) {
                    for (int i = 0; i < e.getNumExceptions(); i++) {
                        System.out.println("Failed to sent put " + e.getRow(i) + ".");
                    }
                }

            };
            BufferedMutatorParams params = new BufferedMutatorParams(TableName.valueOf(tablename))
                    .listener(listener);
            params.writeBufferSize(5 * 1024 * 1024);

            final BufferedMutator mutator = connection.getBufferedMutator(params);
            try {
                mutator.mutate(puts);
                mutator.flush();
            } finally {
                mutator.close();
            }
        }

    }

    private List<Put> getPuts() {
        List<Put> puts = new ArrayList<Put>(); // 装入集合一起插入记录

        for (int i = 0; i < 10; i++) {
            String rowkey;
            String phoneNum = getPhoneNum("183");

            // 100条通话记录
            for (int j = 0; j < 100; j++) {
                String phoneDate = getData("2016");
                SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmss");
                try {
                    long dateLong = sdf.parse(phoneDate).getTime();
                    // 降序
                    rowkey = phoneNum + (Long.MAX_VALUE - dateLong);

                    Put put = new Put(rowkey.getBytes());
                    put.add("cf1".getBytes(), "type".getBytes(), (new Random().nextInt(2) + "").getBytes());
                    put.add("cf1".getBytes(), "time".getBytes(), phoneDate.getBytes());
                    put.add("cf1".getBytes(), "dest".getBytes(), getPhoneNum("159").getBytes());

                    puts.add(put);
                } catch (ParseException e) {
                    e.printStackTrace();
                }

            }
        }

        return puts;
    }

    /**
     * 随机生成手机号
     *
     * @param prefix
     * @return
     */
    private String getPhoneNum(String prefix) {
        return prefix + String.format("%08d", new Random().nextInt(99999999));
    }

    /**
     * 随机生成时间
     *
     * @param year
     * @return
     */
    private String getData(String year) {
        Random r = new Random();
        return year + String.format("%02d%02d%02d%02d%02d",
                new Object[]{r.nextInt(12) + 1, r.nextInt(28) + 1, r.nextInt(60), r.nextInt(60), r.nextInt(60)});
    }


    /**
     * @throws IOException
     * @category 查询
     */
    @Test
    public void get() throws IOException {
        String rowkey = "18286352451_1499228897396";
        Get get = new Get(rowkey.getBytes());
        // 搜索筛选查询
        get.addColumn("cf1".getBytes(), "type".getBytes());
        get.addColumn("cf1".getBytes(), "dest".getBytes());
        get.addColumn("cf1".getBytes(), "time".getBytes());

        Result result = table.get(get);

        Cell c1 = result.getColumnLatestCell("cf1".getBytes(), "type".getBytes());
        System.out.println(new String(c1.getValue()));
        Cell c2 = result.getColumnLatestCell("cf1".getBytes(), "dest".getBytes());
        System.out.println(new String(c2.getValue()));
        Cell c3 = result.getColumnLatestCell("cf1".getBytes(), "time".getBytes());
        System.out.println(new String(c3.getValue()));

    }

}

pom.xml文件:

<dependencies>
        <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common -->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>2.6.0</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-hdfs -->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>2.6.0</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-core -->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-core</artifactId>
            <version>2.6.0</version>
        </dependency>
        <!-- junit -->
        <!-- https://mvnrepository.com/artifact/junit/junit -->
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>3.8.2</version>
        </dependency>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>RELEASE</version>
        </dependency>
        <!-- junit -->
        <!-- https://mvnrepository.com/artifact/org.apache.commons/commons-lang3 -->
        <dependency>
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-lang3</artifactId>
            <version>3.4</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/log4j/log4j -->
        <dependency>
            <groupId>log4j</groupId>
            <artifactId>log4j</artifactId>
            <version>1.2.17</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.hbase/hbase-client -->
        <!-- https://mvnrepository.com/artifact/org.apache.hbase/hbase-client -->
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>1.1.9</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.hbase/hbase-common -->
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-common</artifactId>
            <version>1.1.9</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.hbase/hbase-server -->
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-server</artifactId>
            <version>1.1.9</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.hbase/hbase -->
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase</artifactId>
            <version>1.1.9</version>
            <type>pom</type>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.logging.log4j/log4j-core -->
        <dependency>
            <groupId>org.apache.logging.log4j</groupId>
            <artifactId>log4j-core</artifactId>
            <version>2.1</version>
        </dependency> 
    </dependencies>

github地址：https://github.com/huangyueranbbc/HBasePrePartition