Hadoop期末复习贴-Hbase过滤器

最新推荐文章于 2022-12-28 18:48:17 发布

梏十一郎

最新推荐文章于 2022-12-28 18:48:17 发布

阅读量618

点赞数 2

分类专栏：大数据期末复习文章标签： big data hadoop 大数据

本文链接：https://blog.csdn.net/weixin_45629803/article/details/122212736

版权

大数据期末复习专栏收录该内容

6 篇文章 0 订阅

订阅专栏

若本文对你有帮助，请记得点赞、关注我喔！

Hbase过滤器

行过滤器RowFilter

为什么会产生过滤器，因为在之前get和scan的时候，没有办法得到我们想的数据，虽然get和scan中都有addColumn，但是比起下面Filter，也没谁了。

过滤器分两步

创建过滤器

Filter filter = new RowFilter(op,Object)

op是运算符，Object是比较对象
设置过滤器

get/scan.setFilter(filter)

下面来看看行过滤器的代码

package Filter;

import org.apache.hadoop.conf.*;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.util.Bytes;
/**
 * scan.addColumn(Bytes.toBytes("basicInfo"),Bytes.toBytes("name"));//设置扫描basicInfo:name列
 * ResultScanner scanner = table.getScanner(scan);
 * 这样以来，scan将表中info:name所有值都查出来了，并不满足我们想要按照Row来筛选
 * 有了需求，也就有了对策，就是过滤器Filter
 * 两步：
 * 1.创建过滤器
 * 2.设置过滤器
 */
public class RowFilter {
    public static void main(String[] args) throws Exception {
        query("new_table");
    }

    public static void printResult(Result result) throws Exception {
        System.out.println("row:" +
                new String(result.getRow(), "utf-8"));
        for (Cell cell : result.listCells()) {
            String family = Bytes.toString(CellUtil.cloneFamily(cell));
            String qualifier = Bytes.toString(CellUtil.cloneQualifier(cell));
            String value = Bytes.toString(CellUtil.cloneValue(cell));
            System.out.println(family + ":" + qualifier + " " + value);
        }
    }

    public static void query(String tName) throws Exception {
        /********* Begin *********/
        Configuration configuration = new Configuration();
        Connection connection = ConnectionFactory.createConnection();
        TableName tableName = TableName.valueOf(tName);
        Table table = connection.getTable(tableName);


        /**
         * 首先确定我们要筛选的列限定符所在的列族
         * 这里看的出，Filter适合scan配合使用
         * RowFilter(CompareOperator op,ByteArrayComparable rowComparator)
         * 第一个参数接收的是比较操作对象，第二个参数接收的是条件。
         *
         * 抽象模型：
         * Scan scan = new Scan
         * Filter filter = new RowFilter(比较符,比较对象)
         * scan.setFilter(filter)
         * ResultScanner result = table.getScanner(scan)
         */
        Scan scan = new Scan();
        scan.addColumn(Bytes.toBytes("info"), Bytes.toBytes("name"));
        Filter filter = new org.apache.hadoop.hbase.filter.RowFilter(CompareOperator.EQUAL,
                new BinaryComparator(Bytes.toBytes("1001")));
        scan.setFilter(filter);
        ResultScanner scanner = table.getScanner(scan);
        for (Result result : scanner) {
            printResult(result);
        }
        Scan scan1 = new Scan();
        scan1.addColumn(Bytes.toBytes("school_info"), Bytes.toBytes("college"));
        Filter filter1 = new org.apache.hadoop.hbase.filter.RowFilter(CompareOperator.GREATER_OR_EQUAL,
                new BinaryComparator(Bytes.toBytes("2020")));
        scan1.setFilter(filter1);
        ResultScanner results = table.getScanner(scan1);

        Scan scan2 = new Scan();
        scan2.addColumn(Bytes.toBytes("basic_info"), Bytes.toBytes("name"));
        Filter filter2 = new org.apache.hadoop.hbase.filter.RowFilter(CompareOperator.LESS_OR_EQUAL,
                new BinaryComparator(Bytes.toBytes("2018")));


        scanner.close();

    }
}

CompareOperator op
op有如下多种场景在这里插入图片描述

多种过滤器

先上來王炸

Get和Scan的不同：Get是通过指定的RowKey来返回一行数据,而Scan是通过特定的条件来返回一批数据。然后两者生成的对象中都有.setFilter方法，即两者都可以和Filter配合使用。
Get结果产生：Result = table.get(get)
Scan结果产生：ResultScanner = table.getScan(scan)
固二者最后print的方式也不用，前者直接printResult函数，而后者用有多条Result类型数据，则要循环输出(Result res: ResultScanner…)。

列族过滤器FamilyFilter

       //设置一个列族过滤器，只输出2018行的school_info列族
        Filter filter = new FamilyFilter(CompareOperator.EQUAL,
                new BinaryComparator(Bytes.toBytes("school_info")));
        Get get = new Get(Bytes.toBytes("2018"));
        get.setFilter(filter);
        Result result = table.get(get);
        printResult(result);

列名过滤器QualifierFilter

		//Scan 设置一个列名过滤器,获取所有列名为name的值
        Scan scan = new Scan();
        Filter filter1 = new QualifierFilter(CompareOperator.EQUAL,
                new BinaryComparator(Bytes.toBytes("name")));
        scan.setFilter(filter1);


        //Get 获取行键为2018，列名为name的值
        Get get1 = new Get(Bytes.toBytes("2018"));
        Filter filter2 = new QualifierFilter(CompareOperator.EQUAL,
                new BinaryComparator(Bytes.toBytes("name")));
        get1.setFilter(filter2);
        Result result1 = table.get(get1);
        printResult(result1);

值过滤器

值过滤器ValueFilter，可以帮助用户筛选某个特定值的单元格
特殊注意，SubstringComparator(是String类型)

public static void ValueFilterQuery(Table table) throws Exception {
        /**
         * 值过滤器ValueFilter，可以帮助用户筛选某个特定值的单元格，
         * 特殊注意，SubstringComparator(是String类型)
         */
        Filter filter3 = new ValueFilter(CompareOperator.EQUAL,
                new SubstringComparator("Ha"));
        Scan scan = new Scan();
        scan.setFilter(filter3);
        ResultScanner results = table.getScanner(scan);
        //printResult(results);
        for (Result re : results) {
            printResult(re);
        }
    }