Hbase模糊查询优化 - 并发查询

最新推荐文章于 2024-04-26 02:30:00 发布

春_

最新推荐文章于 2024-04-26 02:30:00 发布

阅读量1.6k

点赞数

分类专栏： HBase 文章标签： hbase hadoop 大数据

本文链接：https://blog.csdn.net/weixin_43736084/article/details/127048478

版权

HBase 专栏收录该内容

3 篇文章 0 订阅

订阅专栏

HBase模糊查询优化 - 并发查询

HBase查询优化续集，继上次优化后查询速度还是很慢，
这次优化我们使用并发查询，查询HBase库里的各region拆分情况，然后对查询的rowkey切分成多段，每一段单独去不同的region中查询，使我们可以并发查询来提升查询速度。

优化后经过测试查询速度大大提升！

代码如下

package query;

import main.TaskExecutors;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.filter.PrefixFilter;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;

/**
 * <p>
 * 用来作为并发查询HBase使用，原理是使用scan来设置startrow和endrow。
 * 因为HBase存储rowkey是字典序排序;
 * 所以如果是单线程查询我们一般设置startrow为rowkey+"0" , endrow为rowkey+"z"。
 * 我们并发scan可以将startrow和endrow拆分成多份，例如:
 * 分为rowkey+'0' -> rowkey+'9' 和 rowkey+'A' -> rowkey+'Z' 和 rowkey+'a' -> rowkey+'z'，这样就可以分为三个线程并发查询
 * <p/>
 *
 * @author chun
 * @date 2022/7/21 16:48
 */

public class ConcurrentScanHBase {
    private ExecutorService pool = null;

    private String rowKey;

    //Pair自定了，也可以使用javafx.util.Pair，需要看服务器上的jdk是oracle还是openjdk,openjdk没有,可以把oracleJDK的Pair类直接复制过来使用
    private List<Pair<Character, Character>> rowkeyRanges;

    private List<Scan> scanList = new ArrayList<>();

    private SetScan setScan = null;

    private CountDownLatch countDownLatch;

    private static final String format = "yyyyMMddHH";

    public ConcurrentScanHBase(ExecutorService pool, String rowkey, List<Pair<Character, Character>> rowkeyRanges, SetScan setScan) {
        if (setScan == null) {
            throw new NullPointerException("SetScan is NULL");
        }
        this.pool = pool;
        this.rowKey = rowkey;
        this.rowkeyRanges = rowkeyRanges;
        this.setScan = setScan;

        init();
    }

    private void init() {
        for (Pair<Character, Character> rowkeyRange : rowkeyRanges) {
            scanList.add(getScann(rowKey.getBytes(), (rowKey + rowkeyRange.getKey()).getBytes(), (rowKey + rowkeyRange.getValue()).getBytes()));
        }
        countDownLatch = new CountDownLatch(rowkeyRanges.size());
    }

    //如果需要主线程等待此次任务结束，调用await()方法;
    public void await() {
        try {
            this.countDownLatch.await();
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }
    }

    public void exec(ExecScan execScan) {
        for (Scan scan : scanList) {
            pool.execute(() -> {
                execScan.exec(scan);
                countDownLatch.countDown();
            });
        }
    }

    //只设置startrow和endrow，其他设置需要返回后自己设置
    private Scan getScann(byte[] rowkey, byte[] startRow, byte[] endRow) {
        Scan scan = new Scan();
        scan.withStartRow(startRow);
        scan.withStopRow(endRow);
        PrefixFilter prefixFilter = new PrefixFilter(rowkey);
        scan.setFilter(prefixFilter);
        setScan.initScan(scan);
        return scan;
    }
}

匿名内部类接口

package query;

import org.apache.hadoop.hbase.client.Scan;

/**
 * 作为执行扫描表的接口使用
 * 通过设置匿名内部类，来加载扫描表的实现过程
 *
 * @author chun
 * @date 2022/7/21 17:52
 */
public interface ExecScan {
    public void exec(Scan scan);
}

package query;

import org.apache.hadoop.hbase.client.Scan;

/**
 * 作为ConcurrentScanHBase中init方法的参数接口
 * 为了方便每个调用者对Scan的初始化而设计
 * 调用者可以通过匿名内部类的方式使用
 * 来设置除了startrow和endrow之外的其他参数
 *
 * @author chun
 * @date 2022/7/21 17:52
 */
public interface SetScan {
    public void initScan(Scan scan);
}

Pair类

package query;

import javafx.beans.NamedArg;

import java.io.Serializable;

/**
 * @author chun
 * @date 2022/7/22 10:10
 */
public class Pair<K, V> implements Serializable {

    /**
     * Key of this <code>Pair</code>.
     */
    private K key;

    /**
     * Gets the key for this pair.
     *
     * @return key for this pair
     */
    public K getKey() {
        return key;
    }

    /**
     * Value of this this <code>Pair</code>.
     */
    private V value;

    /**
     * Gets the value for this pair.
     *
     * @return value for this pair
     */
    public V getValue() {
        return value;
    }

    /**
     * Creates a new pair
     *
     * @param key   The key for this pair
     * @param value The value to use for this pair
     */
    public Pair(@NamedArg("key") K key, @NamedArg("value") V value) {
        this.key = key;
        this.value = value;
    }

    /**
     * <p><code>String</code> representation of this
     * <code>Pair</code>.</p>
     *
     * <p>The default name/value delimiter '=' is always used.</p>
     *
     * @return <code>String</code> representation of this <code>Pair</code>
     */
    @Override
    public String toString() {
        return key + "=" + value;
    }

    /**
     * <p>Generate a hash code for this <code>Pair</code>.</p>
     *
     * <p>The hash code is calculated using both the name and
     * the value of the <code>Pair</code>.</p>
     *
     * @return hash code for this <code>Pair</code>
     */
    @Override
    public int hashCode() {
        // name's hashCode is multiplied by an arbitrary prime number (13)
        // in order to make sure there is a difference in the hashCode between
        // these two parameters:
        //  name: a  value: aa
        //  name: aa value: a
        return key.hashCode() * 13 + (value == null ? 0 : value.hashCode());
    }

    /**
     * <p>Test this <code>Pair</code> for equality with another
     * <code>Object</code>.</p>
     *
     * <p>If the <code>Object</code> to be tested is not a
     * <code>Pair</code> or is <code>null</code>, then this method
     * returns <code>false</code>.</p>
     *
     * <p>Two <code>Pair</code>s are considered equal if and only if
     * both the names and values are equal.</p>
     *
     * @param o the <code>Object</code> to test for
     *          equality with this <code>Pair</code>
     * @return <code>true</code> if the given <code>Object</code> is
     * equal to this <code>Pair</code> else <code>false</code>
     */
    @Override
    public boolean equals(Object o) {
        if (this == o) return true;
        if (o instanceof Pair) {
            Pair pair = (Pair) o;
            if (key != null ? !key.equals(pair.key) : pair.key != null) return false;
            if (value != null ? !value.equals(pair.value) : pair.value != null) return false;
            return true;
        }
        return false;
    }
}

使用


    public static void main(String[] args) {
        ArrayList<Pair<Character, Character>> pairs = new ArrayList<>();
        pairs.add(new Pair<>('-', '3'));
        pairs.add(new Pair<>('4', '9'));
        pairs.add(new Pair<>('A', 'J'));
        pairs.add(new Pair<>('K', 'T'));
        pairs.add(new Pair<>('U', 'Z'));
        pairs.add(new Pair<>('a', 'j'));
        pairs.add(new Pair<>('k', 't'));
        pairs.add(new Pair<>('u', 'z'));

        ConcurrentScanHBase concurrentScanHBase = new ConcurrentScanHBase(TaskExecutors.getPool(), "baidu.com", pairs, new SetScan() {
            @Override
            public void initScan(Scan scan) {
                scan.setCacheBlocks(false);
                scan.setBatch(6000);
                scan.addFamily(Bytes.toBytes("D"));
            }
        });

        concurrentScanHBase.exec(new ExecScan() {
            @Override
            public void exec(Scan scan) {
                try (Table table = HBaseHelper.getConnection().getTable(TableName.valueOf(HBaseHelper.TABLE_NAME));
                     ResultScanner scanner = table.getScanner(scan)) {
                    int index = 0;
                    for (Result[] results = scanner.next(6000); results.length != 0 && index++ < 3; results = scanner.next(6000)) {
                        for (Result result : results) {
                            for (Cell cell : result.rawCells()) {
                                try {
                                    byte[] bytes = Bytes.copy(cell.getValueArray(), cell.getValueOffset(),
                                            cell.getValueLength());
                                    System.out.println(bytes);
                                } catch (Exception e) {
                                    e.printStackTrace();
                                }
                            }
                        }
                    }
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        });
        
        try {
            concurrentScanHBase.await();
        } catch (RuntimeException e) {
            e.printStackTrace();
        }
    }