简要介绍
HBase中的过滤器类似于SQL中的where条件。过滤器可以在HBase中的多个维度上进行数据的筛选操作。筛选器筛选的数据能够细化到具体的一个存储单元格上(行键、列族、列限定符)
过滤器的参数
过滤器中至少需要使用两类参数:抽象的运算符和比较器
抽象的运算符:
运算符 | 含义 |
---|---|
LESS | 小于 |
LESS_OR_EQUAL | 小于等于 |
EQUAL | 等于 |
NOT_EQUAL | 不等于 |
GREATER | 大于 |
GREATER_OR_EQUAL | 大于等于 |
NO_OP | 无操作 |
比较器
用于处理具体的比较逻辑。例如:字节级的比较、字符串级的比较等
比较器 | 含义 |
---|---|
BinaryComparator | 二进制比较器。用于按字典顺序比较Byte数据值,采用Bytes.compareTo(Bytes[])进行比较 |
BinaryPrefixComparator | 前缀二进制比较器。与二进制比较器不同的是:只比较前缀是否相同 |
NullComparator | 空值比较器。判断给定的值是否为空 |
RegexStringComparator | 正则比较器,仅支持EQUAL和NOT_EQUAL |
SubstringComparator | 字符串包含比较器。用于监测一个字符串是否存在于值中,并且不区分大小写 |
过滤案例
行键过滤器
package Filter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.CompareOperator;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.RowFilter;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
public class Rowkey {
public static void main(String[] args) throws IOException {
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum","192.168.1.100:2181," +
"192.168.1.101:2181,192.168.1.102:2181");
Connection connection = ConnectionFactory.createConnection(conf);
Table t1 = connection.getTable(TableName.valueOf("t1"));
// 创建一个查询指定表的数据
Scan scan = new Scan();
// 创建一个行键过滤器
RowFilter filter = new RowFilter(CompareOperator.EQUAL, new BinaryComparator(Bytes.toBytes("row1")));
scan.setFilter(filter);
ResultScanner scanner = t1.getScanner(scan);
// 实际生成中,会将查询之后的结果转换成map,添加到list集合中
// 最后转换为JSON格式的数据返回给前端
}
}
列族过滤器
```java
package Filter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.CompareOperator;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.RegionTooBusyException;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.FamilyFilter;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
public class Family {
public static void main(String[] args) throws IOException {
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum","192.168.1.100:2181," +
"192.168.1.101:2181,192.168.1.102:2181");
Connection connection = ConnectionFactory.createConnection(conf);
Table t1 = connection.getTable(TableName.valueOf("t1"));
Scan scan = new Scan();
FamilyFilter f1 = new FamilyFilter(CompareOperator.EQUAL, new BinaryComparator(Bytes.toBytes("f1")));
scan.setFilter(f1);
ResultScanner scanner = t1.getScanner(scan);
}
}
列过滤器
package Filter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.CompareOperator;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.QualifierFilter;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
public class Qualifier {
public static void main(String[] args) throws IOException {
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum","192.168.1.100:2181," +
"192.168.1.101:2181,192.168.1.102:2181");
Connection connection = ConnectionFactory.createConnection(conf);
Table t1 = connection.getTable(TableName.valueOf("t1"));
Scan scan = new Scan();
QualifierFilter name = new QualifierFilter(CompareOperator.EQUAL, new BinaryComparator(Bytes.toBytes("name")));
scan.setFilter(name);
ResultScanner res = t1.getScanner(scan);
}
}
值过滤器
package Filter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.CompareOperator;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.SubstringComparator;
import org.apache.hadoop.hbase.filter.ValueFilter;
import java.io.IOException;
public class Value {
public static void main(String[] args) throws IOException {
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum","192.168.1.100:2181," +
"192.168.1.101:2181,192.168.1.102:2181");
Connection connection = ConnectionFactory.createConnection(conf);
Table t1 = connection.getTable(TableName.valueOf("t1"));
Scan scan = new Scan();
ValueFilter valueFilter = new ValueFilter(CompareOperator.EQUAL, new SubstringComparator("xiaoming"));
scan.setFilter(valueFilter);
ResultScanner res = t1.getScanner(scan);
}
}
单列值过滤器
package Filter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.CompareOperator;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.filter.SubstringComparator;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
public class SingleColumnValue {
public static void main(String[] args) throws IOException {
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum","192.168.1.100:2181," +
"192.168.1.101:2181,192.168.1.102:2181");
Connection connection = ConnectionFactory.createConnection(conf);
Table t1 = connection.getTable(TableName.valueOf("t1"));
Scan scan = new Scan();
SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter(Bytes.toBytes("f1"), Bytes.toBytes("name"),
CompareOperator.NOT_EQUAL, new SubstringComparator("xiaoming"));
// 如果name列不存在,则被过滤掉,设置为false,则不过滤
singleColumnValueFilter.setFilterIfMissing(true);
scan.setFilter(singleColumnValueFilter);
ResultScanner res = t1.getScanner(scan);
}
}
多条件过滤器
package Filter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.CompareOperator;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
public class MulConditionFilter {
public static void main(String[] args) throws IOException {
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "192.168.1.100:2181," +
"192.168.1.101:2181,192.168.1.102:2181");
Connection connection = ConnectionFactory.createConnection(conf);
Table t1 = connection.getTable(TableName.valueOf("t1"));
Scan scan = new Scan();
SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter(Bytes.toBytes("f1"), Bytes.toBytes("name"),
CompareOperator.NOT_EQUAL, new SubstringComparator("xiaoming"));
// 如果name列不存在,则被过滤掉,设置为false,则不过滤
singleColumnValueFilter.setFilterIfMissing(true);
FamilyFilter familyFilter = new FamilyFilter(CompareOperator.EQUAL, new BinaryComparator(Bytes.toBytes("f1")));
// 创建过滤器集合对象
FilterList filterList = new FilterList();
// 添加多个过滤器到过滤器集合对象中
filterList.addFilter(singleColumnValueFilter);
filterList.addFilter(familyFilter);
// 设置查询过滤器
scan.setFilter(filterList);
}
}