1.1. hbase过滤器
1.1.1. FilterList
FilterList 代表一个过滤器列表,可以添加多个过滤器进行查询,多个过滤器之间的关系有:
与关系(符合所有):FilterList.Operator.MUST_PASS_ALL
或关系(符合任一):FilterList.Operator.MUST_PASS_ONE
使用方法:
FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ONE);
Scan s1 = new Scan();
filterList.addFilter(new SingleColumnValueFilter(Bytes.toBytes(“f1”), Bytes.toBytes(“c1”), CompareOp.EQUAL,Bytes.toBytes(“v1”) ) );
filterList.addFilter(new SingleColumnValueFilter(Bytes.toBytes(“f1”), Bytes.toBytes(“c2”), CompareOp.EQUAL,Bytes.toBytes(“v2”) ) );
// 添加下面这一行后,则只返回指定的cell,同一行中的其他cell不返回
s1.addColumn(Bytes.toBytes(“f1”), Bytes.toBytes(“c1”));
s1.setFilter(filterList); //设置filter
ResultScanner ResultScannerFilterList = table.getScanner(s1); //返回结果列表
1.1.2. 过滤器的种类
过滤器的种类:
列植过滤器—SingleColumnValueFilter
过滤列植的相等、不等、范围等
列名前缀过滤器—ColumnPrefixFilter
过滤指定前缀的列名
多个列名前缀过滤器—MultipleColumnPrefixFilter
过滤多个指定前缀的列名
rowKey过滤器—RowFilter
通过正则,过滤rowKey值。
1.1.3. 列植过滤器—SingleColumnValueFilter
SingleColumnValueFilter 列值判断
相等 (CompareOp.EQUAL ),
不等(CompareOp.NOT_EQUAL),
范围 (e.g., CompareOp.GREATER)…………
下面示例检查列值和字符串'values' 相等...
SingleColumnValueFilter f = new SingleColumnValueFilter(
Bytes.toBytes("cFamily") Bytes.toBytes("column"), CompareFilter.CompareOp.EQUAL,
Bytes.toBytes("values"));
s1.setFilter(f);
注意:如果过滤器过滤的列在数据表中有的行中不存在,那么这个过滤器对此行无法过滤。
1.1.4. 列名前缀过滤器—ColumnPrefixFilter
过滤器—ColumnPrefixFilter
ColumnPrefixFilter 用于指定列名前缀值相等
ColumnPrefixFilter f = new ColumnPrefixFilter(Bytes.toBytes("values"));
s1.setFilter(f);
1.1.5. 多个列值前缀过滤器—MultipleColumnPrefixFilter
MultipleColumnPrefixFilter 和 ColumnPrefixFilter 行为差不多,但可以指定多个前缀
byte[][] prefixes = new byte[][] {Bytes.toBytes("value1"),Bytes.toBytes("value2")};
Filter f = new MultipleColumnPrefixFilter(prefixes);
s1.setFilter(f);
1.1.6. rowKey过滤器—RowFilter
RowFilter 是rowkey过滤器
通常根据rowkey来指定范围时,使用scan扫描器的StartRow和StopRow方法比较好。
Filter f = new RowFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator("^1234")); //匹配以1234开头的rowkey
s1.setFilter(f);
package hbase.test;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;
import java.text.DecimalFormat;
import java.util.Iterator;
import java.util.List;
/**
* 测试过滤器
*/
public class TestFilter {
/**
* rowkey过滤
*/
@Test
public void testRowFilter() throws Exception {
Configuration conf = HBaseConfiguration.create();
Connection conn = ConnectionFactory.createConnection(conf);
Table t = conn.getTable(TableName.valueOf("ns1:t2"));
Scan scan = new Scan();
// where rowkey <= row088
// RowFilter filter = new RowFilter(CompareFilter.CompareOp.LESS_OR_EQUAL,new BinaryComparator(Bytes.toBytes("row088")));
//where rowkey like '%88%'
// RowFilter filter = new RowFilter(CompareFilter.CompareOp.EQUAL,new RegexStringComparator("88"));
//while rowkey like '%88' ;
RowFilter filter = new RowFilter(CompareFilter.CompareOp.EQUAL,new RegexStringComparator("88$"));
scan.setFilter(filter);
ResultScanner rs = t.getScanner(scan) ;
Iterator<Result> it = rs.iterator();
while(it.hasNext()){
Result r = it.next();
outResult(r);
}
rs.close();
}
/**
* family过滤
*/
@Test
public void testFamilyFilter() throws Exception {
Configuration conf = HBaseConfiguration.create();
Connection conn = ConnectionFactory.createConnection(conf);
Table t = conn.getTable(TableName.valueOf("ns1:t2"));
Scan scan = new Scan();
//以f开头的列族进行过滤
FamilyFilter filter = new FamilyFilter(CompareFilter.CompareOp.EQUAL,new RegexStringComparator("^f"));
scan.setFilter(filter);
ResultScanner rs = t.getScanner(scan) ;
Iterator<Result> it = rs.iterator();
while(it.hasNext()){
Result r = it.next();
outResult(r);
}
rs.close();
}
/**
* col过滤
*/
@Test
public void testQualifierFilter() throws Exception {
Configuration conf = HBaseConfiguration.create();
Connection conn = ConnectionFactory.createConnection(conf);
Table t = conn.getTable(TableName.valueOf("ns1:t2"));
Scan scan = new Scan();
//以f开头的列族进行过滤
QualifierFilter filter = new QualifierFilter(QualifierFilter.CompareOp.EQUAL,new RegexStringComparator("^name$"));
scan.setFilter(filter);
ResultScanner rs = t.getScanner(scan) ;
Iterator<Result> it = rs.iterator();
while(it.hasNext()){
Result r = it.next();
outResult(r);
}
rs.close();
}
/**
* 组合过滤
*/
@Test
public void testFilterList() throws Exception {
Configuration conf = HBaseConfiguration.create();
Connection conn = ConnectionFactory.createConnection(conf);
Table t = conn.getTable(TableName.valueOf("ns1:t2"));
Scan scan = new Scan();
//MUST_PASS_ALL : and
//MUST_PASS_ONe :Or
FilterList filter = new FilterList(FilterList.Operator.MUST_PASS_ALL) ;
ValueFilter f1 = new ValueFilter(QualifierFilter.CompareOp.EQUAL,new RegexStringComparator("m8"));
filter.addFilter(f1);
RowFilter f2 = new RowFilter(CompareFilter.CompareOp.GREATER,new BinaryComparator(Bytes.toBytes("row8"))) ;
filter.addFilter(f2);
scan.setFilter(filter);
ResultScanner rs = t.getScanner(scan) ;
Iterator<Result> it = rs.iterator();
while(it.hasNext()){
Result r = it.next();
outResult(r);
}
rs.close();
}
/**
* value过滤
*/
@Test
public void testValueFilter() throws Exception {
Configuration conf = HBaseConfiguration.create();
Connection conn = ConnectionFactory.createConnection(conf);
Table t = conn.getTable(TableName.valueOf("ns1:t2"));
Scan scan = new Scan();
//以f开头的列族进行过滤
ValueFilter filter = new ValueFilter(QualifierFilter.CompareOp.EQUAL,new RegexStringComparator("m8"));
scan.setFilter(filter);
ResultScanner rs = t.getScanner(scan) ;
Iterator<Result> it = rs.iterator();
while(it.hasNext()){
Result r = it.next();
outResult(r);
}
rs.close();
}
private void outResult(Result r) {
System.out.println("=========================");
List<Cell> cells = r.listCells();
for (Cell cell : cells) {
String rowkey = Bytes.toString(CellUtil.cloneRow(cell));
String f = Bytes.toString(CellUtil.cloneFamily(cell));
String col = Bytes.toString(CellUtil.cloneQualifier(cell));
long ts = cell.getTimestamp();
String value = Bytes.toString(CellUtil.cloneValue(cell));
System.out.println(rowkey + "/" + f + ":" + col + "/" + ts + "=" + value);
}
}