Hbase葱岭探秘--过滤器Api

最新推荐文章于 2021-03-29 10:40:11 发布

原创最新推荐文章于 2021-03-29 10:40:11 发布 · 2.8k 阅读

4 ·

CC 4.0 BY-SA版权

文章标签：

#hbase #大数据 #Hbase过滤器

Hbase 专栏收录该内容

6 篇文章

订阅专栏

本文详细介绍了HBase中各种过滤器的功能与用法，包括行过滤器、列族过滤器、列名过滤器等，并提供了具体的示例代码，有助于读者更好地理解和应用。

Hbase中提供了许多的过滤器接口，以此来对数据进行过滤，使得查询出想要的数据。

行过滤器

针对行信息进行过滤，参数中可以采用前缀匹配、按位与、或、异或以及子串匹配等匹配的方式。同时可以控制EQUAL、NOT_EQUAL选项进行控制筛选数据的条件。

/**
     * 行过滤器 BinaryComparator 
        NullComparator:是不是空值
     * BitComparator:通过BitwiseOp类提供的按位与、或、异或操作进行位级别比较 RegexStringComparator:正则匹配
     * SubStringComparator:子串是不是包含进行匹配
     */
    private static void testRowFilter() {
        try {
            HTable table = new HTable(config, "testtable");

            Scan scan = new Scan();
            scan.addColumn("col1".getBytes(), "name".getBytes());
            // 行过滤器
            Filter filter = new RowFilter(CompareOp.EQUAL,
                    new BinaryComparator("row2".getBytes()));
            scan.setFilter(filter);

            ResultScanner result = table.getScanner(scan);
            for (Result res : result) {
                log.info("行过滤器>" + res);
            }

            // 正则的行过滤器
            Filter filter2 = new RowFilter(CompareOp.EQUAL,
                    new RegexStringComparator(".*.2"));
            scan.setFilter(filter2);
            ResultScanner resultRegx = table.getScanner(scan);
            for (Result res : resultRegx) {
                log.info("正则>" + res);
            }

            Filter filterSubString = new RowFilter(CompareOp.EQUAL,
                    new SubstringComparator("w2"));
            scan.setFilter(filterSubString);
            ResultScanner resultSubString = table.getScanner(scan);
            for (Result res : resultSubString) {
                log.info("子串>" + res);
            }

            table.close();
        } catch (IOException e) {
            log.error(e);
        }
    }

列族过滤器

根据列族的数据进行筛选，形式和上面的行过滤器类似，通过控制相应的参数中的筛选的条件进行相应的筛选。

/**
     * 列族过滤器
     */
    private static void testFamlyFilter() {

        try {
            HTable table = new HTable(config, "testtable");

            Filter filter = new FamilyFilter(CompareOp.EQUAL,
                    new BinaryComparator("col1".getBytes()));
            Scan scan = new Scan("row2".getBytes(), filter);
            ResultScanner result = table.getScanner(scan);
            for (Result res : result) {
                log.info(res);
            }

            Filter filterNull = new FamilyFilter(CompareOp.EQUAL,
                    new RegexStringComparator(".*.1"));
            Scan scanNull = new Scan("row2".getBytes(), filterNull);
            scanNull.addFamily("col1".getBytes());
            ResultScanner resultNull = table.getScanner(scanNull);
            if (resultNull != null) {
                for (Result res : resultNull) {
                    log.info(res);
                }
            } else {
                log.info("null");
            }

            table.close();
        } catch (IOException e) {
            log.error(e);
        }

    }

列名过滤器

和上面几个过滤器类似，这里是根据列进行筛选，设置相应的条件后就可以进行相应的筛选了。

/**
     * 列名过滤器
     */
    public static void testColumFilter() {

        try {
            HTable table = new HTable(config, "testtable");

            Filter filter = new QualifierFilter(CompareOp.EQUAL,
                    new BinaryComparator("name".getBytes()));
            Scan scan = new Scan("row2".getBytes(), filter);
            ResultScanner result = table.getScanner(scan);
            for (Result res : result) {
                log.info(res);
            }

            Get get = new Get("row2".getBytes());
            get.setFilter(filter);
            Result resultGet = table.get(get);
            log.info(resultGet);

            table.close();
        } catch (IOException e) {
            log.info(e);
        }

    }

参考列过滤器

参考列过滤器根据列族和列限定符进行筛选，返回与参考列相同时间戳的行的所有键值对。

/**
     * 参考列过滤器
     */
    public static void testDependentColumnFilter() {

        try {
            HTable table = new HTable(config, "testtable");

            Filter filter = new DependentColumnFilter("col1".getBytes(),
                    "name".getBytes(), false);
            Scan scan = new Scan();
            scan.setFilter(filter);
            ResultScanner resu = table.getScanner(scan);
            for (Result result : resu) {
                log.info(result);
            }

            Get get = new Get("row2".getBytes());
            get.setFilter(filter);
            Result result = table.get(get);
            log.info(result);

            table.close();
        } catch (IOException e) {
            log.error(e);
        }

    }

单列过滤器

通过一列的值进行判断是不是需要进行过滤。

/**
     * 单列过滤器
     */
    public static void testSingleColumnValueFilter() {

        try {
            HTable table = new HTable(config, "testtable");

            Filter filter = new SingleColumnValueFilter("col1".getBytes(),
                    "name".getBytes(), CompareOp.EQUAL, "wy".getBytes());
            Scan scan = new Scan();
            scan.setFilter(filter);
            ResultScanner result = table.getScanner(scan);
            for (Result res : result) {
                log.info(res);
            }

            Get get = new Get("row2".getBytes());
            get.setFilter(filter);
            Result resultGet = table.get(get);
            log.info(resultGet);

            table.close();
        } catch (IOException e) {
            log.info(e);
        }
    }

前缀过滤器

根据前缀进行匹配行键的数据，本例中给出的是以row为前缀的行的数据。

/**
     * 前缀过滤器
     */
    public static void testPrefixFilter() {

        try {
            HTable table = new HTable(config, "testtable");

            Filter filter = new PrefixFilter("row".getBytes());
            Scan scan = new Scan();

            scan.setFilter(filter);
            ResultScanner result = table.getScanner(scan);
            for (Result res : result) {
                log.info("res>" + res);
            }

            Get get = new Get("row2".getBytes());
            Result resultGet = table.get(get);
            log.info("get>" + resultGet);

            table.close();
        } catch (IOException e) {
            log.info(e);
        }
    }

分页过滤器

通过pageFilter设置一页中数据的条数，注意，在重新设置起始行的时候，要使得新的行和数据库中有区别，否则，会死循环无法停止。

/**
     * 分页过滤器
     */
    public static void testPageFilter() {

        try {
            HTable table = new HTable(config, "testtable");

            Filter filter = new PageFilter(10);
            int totalRows = 0;
            byte[] lastRow = null;
            Scan scan = new Scan();
            while (true) {

                scan.setFilter(filter);
                if (lastRow != null) {
                    // 加上0后表示新的开始防止row的内容一样造成死循环
                    byte[] startRow = Bytes.add(lastRow, POSTFIX);
                    scan.setStartRow(startRow);
                }

                ResultScanner resultScan = table.getScanner(scan);

                int localRows = 0;
                Result result = resultScan.next();
                while (result != null) {

                    log.info(result);
                    localRows++;
                    totalRows++;

                    lastRow = result.getRow();
                    result = resultScan.next();
                }
                if (localRows == 0)
                    break;
            }
            log.info(totalRows);

            table.close();
        } catch (IOException e) {
            log.info(e);
        }

    }

/**
     * 列分页过滤
     */
    public static void testColumnPaginationFilter() {

        try {
            HTable table = new HTable(config, "testtable");

            Filter filter = new ColumnPaginationFilter(5, 10);
            Scan scan = new Scan();
            scan.setFilter(filter);

            ResultScanner result = table.getScanner(scan);
            for (Result res : result) {
                log.info(res);
            }

            table.close();
        } catch (IOException e) {
            log.info(e);
        }
    }

Skip过滤器

与ValueFilter结合使用，如果一行中某一列不符合要求的话直接被过滤掉。

/**
     * 跳过过滤器
     */
    public static void testSkipFilter() {

        try {
            HTable table = new HTable(config, "testtable");

            Filter filt = new ValueFilter(CompareOp.NOT_EQUAL,
                    new BinaryComparator("v".getBytes()));
            Scan scanValue = new Scan();
            scanValue.setFilter(filt);
            ResultScanner ress = table.getScanner(scanValue);

            for (Result result : ress) {
                log.info("<" + result);
            }

            Filter filter = new SkipFilter(filt);

            Scan scan = new Scan();
            scan.setFilter(filter);
            ResultScanner result = table.getScanner(scan);
            for (Result res : result) {
                log.info(">" + res);
            }

            table.close();
        } catch (IOException e) {
            log.info(e);
        }
    }

全匹配过滤器

在遇到某个条件之前的数据全部查询出来，直到遇到满足该条件的数据之后结束查询。


    /**
     * 全匹配过滤器
     */
    public static void testWhileMatch() {

        try {
            HTable table = new HTable(config, "testtable");

            Filter filt = new RowFilter(CompareOp.NOT_EQUAL,
                    new BinaryComparator("row6".getBytes()));

            Scan scan = new Scan();
            scan.setFilter(filt);
            ResultScanner results = table.getScanner(scan);
            for (Result res : results) {
                log.info(">" + res);
            }

            Filter filter = new WhileMatchFilter(filt);
            scan.setFilter(filter);
            ResultScanner resultScan = table.getScanner(scan);
            for (Result res : resultScan) {
                log.info("<" + res);
            }

            table.close();
        } catch (IOException e) {
            log.info(e);
        }

    }

过滤器组合

可以将上面的过个过滤器放在一个List中，然后形成多个过滤器的组合的形式进行过滤。


    /**
     * 过滤器组合
     */
    public static void testFilterList() {

        List<Filter> filterList = new ArrayList<Filter>();
        Filter filter1 = new SingleColumnValueFilter("col1".getBytes(),
                "name".getBytes(), CompareOp.EQUAL, "x".getBytes());
        filterList.add(filter1);

        Filter filter2 = new RowFilter(CompareOp.NOT_EQUAL,
                new BinaryComparator("row2".getBytes()));
        filterList.add(filter2);

        FilterList filters = new FilterList(filterList);
        Scan scan = new Scan();
        scan.setFilter(filters);

        try {
            HTable table = new HTable(config, "testtable");

            ResultScanner result = table.getScanner(scan);

            for (Result res : result) {
                log.info(res);
            }

            table.close();
        } catch (IOException e) {
            log.info(e);
        }
    }