HBase 过滤器 Java API

8 篇文章 1 订阅

HBase 过滤器 Java API

在IDEA中导入Maven依赖

    <dependencies>
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>1.4.6</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-server</artifactId>
            <version>1.4.6</version>
        </dependency>

        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.8.2</version>
        </dependency>
    </dependencies>

批量插入学生表stu数据

    @Test
    /**
     *  创建stu表,增加一个info列簇,将students.txt的1000条数据全部插入
     */ public void PutStu() throws IOException {
        TableName stu = TableName.valueOf("stu");
        // 创建表
        Admin admin = conn.getAdmin();
        if (!admin.tableExists(stu)) {
            admin.createTable(new HTableDescriptor(stu).addFamily(new HColumnDescriptor("info")));
        }
        Table stuTable = conn.getTable(stu);
        ArrayList<Put> puts = new ArrayList<>();
        // 读取文件
        BufferedReader br = new BufferedReader(new FileReader("data/students.txt"));
        int cnt = 0;
        String line;
        while ((line = br.readLine()) != null) {
            String[] split = line.split(",");
            String id = split[0];
            String name = split[1];
            String age = split[2];
            String gender = split[3];
            String clazz = split[4];

            Put put = new Put(id.getBytes());
            put.addColumn("info".getBytes(),"name".getBytes(),name.getBytes());
            put.addColumn("info".getBytes(),"age".getBytes(),age.getBytes());
            put.addColumn("info".getBytes(),"gender".getBytes(),gender.getBytes());
            put.addColumn("info".getBytes(),"clazz".getBytes(),clazz.getBytes());

            // 批量插入
            puts.add(put);
            cnt += 1;
            if (cnt == 100) {
                stuTable.put(puts);
                puts.clear(); // 清空
                cnt = 0;
            }
            // 逐条插入,效率低
//            stuTable.put(put);
        }
        // 判断Put的List是否为空
        if (!puts.isEmpty()) {
            stuTable.put(puts);
        }
        br.close();
    }

代码封装

定义全局变量

    // 定义全局变量
    Connection conn;
    Table stu;

定义全局方法 执行扫描操作

    // 定义全局方法
    // 执行扫描操作
    public ResultScanner getScannerWithFilter(Filter filter) throws IOException {
        Scan scan = new Scan();
        scan.setFilter(filter);
        return stu.getScanner(scan);
    }

遍历扫描的数据 (所有行)

    // 遍历扫描的数据(所有行)
    public void printScanner(Filter filter) throws IOException {
        for (Result rs : getScannerWithFilter(filter)) {
            String rk = Bytes.toString(rs.getRow());
            String name = Bytes.toString(rs.getValue("info".getBytes(), "name".getBytes()));
            String age = Bytes.toString(rs.getValue("info".getBytes(), "age".getBytes()));
            String gender = Bytes.toString(rs.getValue("info".getBytes(), "gender".getBytes()));
            String clazz = Bytes.toString(rs.getValue("info".getBytes(), "clazz".getBytes()));
            System.out.println(rk + "," + name + "," + age + "," + gender + "," + clazz);
        }
    }

使用CellUtil进行打印

    // 使用CellUtil进行打印
    public void printScannerWithCellUtil(Filter filter) throws IOException {
        for (Result rs : getScannerWithFilter(filter)) {
            for (Cell cell : rs.listCells()) {
                String rowkey = Bytes.toString(CellUtil.cloneRow(cell));
                String value = Bytes.toString(CellUtil.cloneValue(cell));
                System.out.println(rowkey + "," + value);
            }
        }
    }

建立连接

    @Before
    // 建立连接
    public void init() throws IOException {
        // 创建配置文件
        Configuration conf = HBaseConfiguration.create();
        // 设置Zookeeper集群地址
        conf.set("hbase.zookeeper.quorum", "master:2181,node1:2181,node2:2181");
        conn = ConnectionFactory.createConnection(conf);
        stu = conn.getTable(TableName.valueOf("stu"));
    }

关闭连接

    @After
    // 关闭连接
    public void close() throws IOException {
        conn.close();
    }

过滤器

过滤出Rowkey(id)中,包含8的学生信息

    @Test
    // 过滤出Rowkey(id)中,包含8的学生信息
    public void RowFileterWithSubString() throws IOException {
        SubstringComparator comparator = new SubstringComparator("8");
        RowFilter rowFilter = new RowFilter(CompareFilter.CompareOp.EQUAL, comparator);
        printScanner(rowFilter);
    }

运行结果:


过滤出stu表列簇名为info下的所有列的数据

@Test
    // 过滤出stu表列簇名为info下的所有列的数据
    public void FamilyFilterWithCom() throws IOException {
        Scan scan = new Scan();
        FamilyFilter familyFilter = new FamilyFilter(CompareFilter.CompareOp.EQUAL, new BinaryComparator("info".getBytes()));
        scan.setFilter(familyFilter);
        Table test3 = conn.getTable(TableName.valueOf("stu"));
        ResultScanner sc = test3.getScanner(scan);
        for (Result rs : sc) {
            for (Cell cell : rs.listCells()) {
                String rowkey = Bytes.toString(CellUtil.cloneRow(cell));
                String value = Bytes.toString(CellUtil.cloneValue(cell));
                System.out.println(rowkey + "," + value);
            }
        }
    }

运行结果:


stu表中列名包含a的所有列的数据,使用正则表达式

    @Test
    // stu表中列名包含a的所有列的数据,使用正则表达式
    public void QualifierFilterWithRegex() throws IOException {
        QualifierFilter qualifierFilter = new QualifierFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator(".*a.*"));
        printScannerWithCellUtil(qualifierFilter);
    }

运行结果:


过滤出 数据中包含 文 的所有数据

    @Test
    // 过滤出 数据中包含 文 的所有数据
    public void ValueFilterWithSubString() throws IOException {
        ValueFilter valueFilter = new ValueFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator("文"));
        printScannerWithCellUtil(valueFilter);
    }

运行结果:


过滤出 数据中包含 文 的所有数据

    @Test
    // 过滤出 数据中包含 文 的所有数据
    public void ValueFilterWithSubString() throws IOException {
        ValueFilter valueFilter = new ValueFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator("文"));
        printScannerWithCellUtil(valueFilter);
    }

运行结果:


过滤出班级是 文科班 的学生的所有信息

    @Test
    // 过滤出班级是 文科班 的学生的所有信息
    public void SingleColumnValueFilterWithBinaryPrefix() throws IOException {
        SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter("info".getBytes()
                , "clazz".getBytes()
                , CompareFilter.CompareOp.EQUAL
                , new BinaryPrefixComparator("文科".getBytes())
        );
        printScanner(singleColumnValueFilter);
    }

运行结果:


过滤出班级是 文科班 的学生的所有信息,最终结果没有 clazz 列

    @Test
    // 过滤出班级是 文科班 的学生的所有信息,最终结果没有 clazz 列
    public void SingleColumnValueExcludeFilterWithBinaryPrefix() throws IOException {
        SingleColumnValueExcludeFilter singleColumnValueExcludeFilter = new SingleColumnValueExcludeFilter("info".getBytes()
                , "clazz".getBytes()
                , CompareFilter.CompareOp.EQUAL
                , new BinaryPrefixComparator("文科".getBytes())
        );
        printScanner(singleColumnValueExcludeFilter);
    }

运行结果:


过滤出年龄是 奇数 的学生的所有信息

    @Test
    // 过滤出年龄是 奇数 的学生的所有信息
    public void SingleColumnValueFilterWithRegex() throws IOException {
        SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter("info".getBytes()
                , "age".getBytes()
                , CompareFilter.CompareOp.EQUAL
                , new RegexStringComparator("^[0-9]{0,1}[13579]$")
        );
        printScanner(singleColumnValueFilter);
    }

运行结果:


查询以150010008开头的所有前缀的rowkey

    @Test
    // 查询以150010008开头的所有前缀的rowkey
    public void PrefixFilter() throws IOException {
        // 第一种方式
        PrefixFilter prefixFilter = new PrefixFilter("150010008".getBytes());
        printScanner(prefixFilter);

        System.out.println("==================");

        // 第二种方式
        RowFilter rowFilter = new RowFilter(CompareFilter.CompareOp.EQUAL, new BinaryPrefixComparator("150010008".getBytes()));
        printScanner(rowFilter);
    }

运行结果:


多过滤器综合查询

过滤出 理科班 中的 女生 年龄为奇数 的所有信息

    @Test
    // 多过滤器综合查询
    // 过滤出 理科班 中的 女生 年龄为奇数 的所有信息
    public void ComnineFilter() throws IOException {
        SingleColumnValueFilter filter1 = new SingleColumnValueFilter("info".getBytes()
                , "clazz".getBytes()
                , CompareFilter.CompareOp.EQUAL
                , new BinaryPrefixComparator("理科".getBytes())
        );

        SingleColumnValueFilter filter2 = new SingleColumnValueFilter("info".getBytes()
                , "gender".getBytes()
                , CompareFilter.CompareOp.EQUAL
                , "女".getBytes()
        );

        SingleColumnValueFilter filter3 = new SingleColumnValueFilter("info".getBytes()
                , "age".getBytes()
                , CompareFilter.CompareOp.EQUAL
                , new RegexStringComparator("^[0-9]{0,1}[13579]$")
        );

        /**
         * MUST_PASS_ALL ===>  and
         * MUST_PASS_ONE ===>  or
         */

        FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ONE);
        filterList.addFilter(filter1);
        filterList.addFilter(filter2);
        filterList.addFilter(filter3);

        printScanner(filterList);
    }

运行结果:


完整代码

package com.liangzai.hbase;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.io.IOException;

public class Demo04Filter {
    // 定义全局变量
    Connection conn;
    Table stu;

    // 定义全局方法
    // 执行扫描操作
    public ResultScanner getScannerWithFilter(Filter filter) throws IOException {
        Scan scan = new Scan();
        scan.setFilter(filter);
        return stu.getScanner(scan);
    }

    // 遍历扫描的数据(所有行)
    public void printScanner(Filter filter) throws IOException {
        for (Result rs : getScannerWithFilter(filter)) {
            String rk = Bytes.toString(rs.getRow());
            String name = Bytes.toString(rs.getValue("info".getBytes(), "name".getBytes()));
            String age = Bytes.toString(rs.getValue("info".getBytes(), "age".getBytes()));
            String gender = Bytes.toString(rs.getValue("info".getBytes(), "gender".getBytes()));
            String clazz = Bytes.toString(rs.getValue("info".getBytes(), "clazz".getBytes()));
            System.out.println(rk + "," + name + "," + age + "," + gender + "," + clazz);
        }
    }

    // 使用CellUtil进行打印
    public void printScannerWithCellUtil(Filter filter) throws IOException {
        for (Result rs : getScannerWithFilter(filter)) {
            for (Cell cell : rs.listCells()) {
                String rowkey = Bytes.toString(CellUtil.cloneRow(cell));
                String value = Bytes.toString(CellUtil.cloneValue(cell));
                System.out.println(rowkey + "," + value);
            }
        }
    }

    @Before
    // 建立连接
    public void init() throws IOException {
        // 创建配置文件
        Configuration conf = HBaseConfiguration.create();
        // 设置Zookeeper集群地址
        conf.set("hbase.zookeeper.quorum", "master:2181,node1:2181,node2:2181");
        conn = ConnectionFactory.createConnection(conf);
        stu = conn.getTable(TableName.valueOf("stu"));
    }

    @Test
    // 过滤出Rowkey(id)中,包含8的学生信息
    public void RowFileterWithSubString() throws IOException {
        SubstringComparator comparator = new SubstringComparator("8");
        RowFilter rowFilter = new RowFilter(CompareFilter.CompareOp.EQUAL, comparator);
        printScanner(rowFilter);
    }

    @Test
    // 过滤出stu表列簇名为info下的所有列的数据
    public void FamilyFilterWithCom() throws IOException {
        Scan scan = new Scan();
        FamilyFilter familyFilter = new FamilyFilter(CompareFilter.CompareOp.EQUAL, new BinaryComparator("info".getBytes()));
        scan.setFilter(familyFilter);
        Table test3 = conn.getTable(TableName.valueOf("stu"));
        ResultScanner sc = test3.getScanner(scan);
        for (Result rs : sc) {
            for (Cell cell : rs.listCells()) {
                String rowkey = Bytes.toString(CellUtil.cloneRow(cell));
                String value = Bytes.toString(CellUtil.cloneValue(cell));
                System.out.println(rowkey + "," + value);
            }
        }
    }

    @Test
    // stu表中列名包含a的所有列的数据,使用正则表达式
    public void QualifierFilterWithRegex() throws IOException {
        QualifierFilter qualifierFilter = new QualifierFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator(".*a.*"));
        printScannerWithCellUtil(qualifierFilter);
    }

    @Test
    // 过滤出 数据中包含 文 的所有数据
    public void ValueFilterWithSubString() throws IOException {
        ValueFilter valueFilter = new ValueFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator("文"));
        printScannerWithCellUtil(valueFilter);
    }

    @Test
    // 过滤出班级是 文科班 的学生的所有信息
    public void SingleColumnValueFilterWithBinaryPrefix() throws IOException {
        SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter("info".getBytes()
                , "clazz".getBytes()
                , CompareFilter.CompareOp.EQUAL
                , new BinaryPrefixComparator("文科".getBytes())
        );
        printScanner(singleColumnValueFilter);
    }

    @Test
    // 过滤出班级是 文科班 的学生的所有信息,最终结果没有 clazz 列
    public void SingleColumnValueExcludeFilterWithBinaryPrefix() throws IOException {
        SingleColumnValueExcludeFilter singleColumnValueExcludeFilter = new SingleColumnValueExcludeFilter("info".getBytes()
                , "clazz".getBytes()
                , CompareFilter.CompareOp.EQUAL
                , new BinaryPrefixComparator("文科".getBytes())
        );
        printScanner(singleColumnValueExcludeFilter);
    }

    @Test
    // 过滤出年龄是 奇数 的学生的所有信息
    public void SingleColumnValueFilterWithRegex() throws IOException {
        SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter("info".getBytes()
                , "age".getBytes()
                , CompareFilter.CompareOp.EQUAL
                , new RegexStringComparator("^[0-9]{0,1}[13579]$")
        );
        printScanner(singleColumnValueFilter);
    }

    @Test
    // 查询以150010008开头的所有前缀的rowkey
    public void PrefixFilter() throws IOException {
        // 第一种方式
        PrefixFilter prefixFilter = new PrefixFilter("150010008".getBytes());
        printScanner(prefixFilter);

        System.out.println("==================");

        // 第二种方式
        RowFilter rowFilter = new RowFilter(CompareFilter.CompareOp.EQUAL, new BinaryPrefixComparator("150010008".getBytes()));
        printScanner(rowFilter);
    }

    @Test
    // 多过滤器综合查询
    // 过滤出 理科班 中的 女生 年龄为奇数 的所有信息
    public void ComnineFilter() throws IOException {
        SingleColumnValueFilter filter1 = new SingleColumnValueFilter("info".getBytes()
                , "clazz".getBytes()
                , CompareFilter.CompareOp.EQUAL
                , new BinaryPrefixComparator("理科".getBytes())
        );

        SingleColumnValueFilter filter2 = new SingleColumnValueFilter("info".getBytes()
                , "gender".getBytes()
                , CompareFilter.CompareOp.EQUAL
                , "女".getBytes()
        );

        SingleColumnValueFilter filter3 = new SingleColumnValueFilter("info".getBytes()
                , "age".getBytes()
                , CompareFilter.CompareOp.EQUAL
                , new RegexStringComparator("^[0-9]{0,1}[13579]$")
        );

        /**
         * MUST_PASS_ALL ===>  and
         * MUST_PASS_ONE ===>  or
         */

        FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ONE);
        filterList.addFilter(filter1);
        filterList.addFilter(filter2);
        filterList.addFilter(filter3);

        printScanner(filterList);
    }

    @After
    // 关闭连接
    public void close() throws IOException {
        conn.close();
    }
}

到底啦!关注靓仔学习更多的大数据知识!😊

  • 4
    点赞
  • 11
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

liangzai2048

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值