hbase的javaapi操作之过滤器fillter
过滤器:相当于where语句
因为列值过滤器都属于表的操作,属于DML语句,所以我们使用链接来获取表,再用表进行操作
而这个过滤器最终是塞到了SCAN对象中进行操作的,相当于select * from emp where
测试数据:员工表
在讲filter的之前我们先看一下关于filter的继承情况
filter下边有俩个类一个filterBase一个FilterWrapper
我们主要是看FilterBase
这个是所有FilterBase下的过滤器
我们再看一下scan都能做些什么
hbase(main):008:0> help “scan”
Scan a table; pass table name and optionally a dictionary of scanner
specifications. Scanner specifications may include one or more of:
TIMERANGE, FILTER, LIMIT, STARTROW, STOPROW, ROWPREFIXFILTER, TIMESTAMP,
MAXLENGTH, COLUMNS, CACHE, RAW, VERSIONS, ALL_METRICS, METRICS,
REGION_REPLICA_ID, ISOLATION_LEVEL, READ_TYPE, ALLOW_PARTIAL_RESULTS,
BATCH or MAX_RESULT_SIZE
If no columns are specified, all columns will be scanned.
To scan all members of a column family, leave the qualifier empty as in
‘col_family’.
The filter can be specified in two ways:
- Using a filterString - more information on this is available in the
Filter Language document attached to the HBASE-4176 JIRA - Using the entire package name of the filter.
If you wish to see metrics regarding the execution of the scan, the
ALL_METRICS boolean should be set to true. Alternatively, if you would
prefer to see only a subset of the metrics, the METRICS array can be
defined to include the names of only the metrics you care about.
通过查看scan命令,我们可以看到扫描一张表,可以通过表名和可选项scanner字典来指定。
Scanner 的指定操作可以包含一个或者是多个:他们分别是时间范围,过滤器,限制,起始行,终止
行,行前缀,时间错,最大程度,列,缓冲等等
1、
如果没有列被指定,那么所有的列都被scaned到
如果我们想扫描所有的列族,那么我们把列名置空就可以了
比如scan ‘hbase:meta’, {COLUMNS => ‘info’}
2
指定filter有俩种方式
一种就是过滤器的名称
一种是过滤器的全包名
3
关于metrics后续再研究,我眼不知道这是个啥玩意了
其实所有的过滤器都对应的是filter下面的子类
()列值过滤器
根据列值来作为条件的进行scan表
()列名前缀过滤器
通过列名前缀进行过滤的,这个其实不属于where条件,这个属于返回那些列,通过前缀进行过滤一下
()多个列名前缀过滤器
通过多个列名前缀进行过滤的
()rowkey过滤器:相当于get语句
根据行键进行过滤的
(*)组合多个过滤器
通过多个过滤器一起过滤的
示例程序
package day014.hbaseApi;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.CompareOperator;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.filter.ColumnPrefixFilter;
import org.apache.hadoop.hbase.filter.ColumnValueFilter;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.FilterList.Operator;
import org.apache.hadoop.hbase.filter.MultipleColumnPrefixFilter;
import org.apache.hadoop.hbase.filter.RegexStringComparator;
import org.apache.hadoop.hbase.filter.RowFilter;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.P;
import org.junit.Test;
public class HbaseFilter
{
// 测试列值过滤器,相当于select * from emp where sal="3000"
@Test
public void test() {
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "192.168.112.111");
// 通过配置获取一个链接
Connection connection = null;
try
{
// 通过配置获取一个链接
connection = ConnectionFactory.createConnection(conf);
// 因为过滤器都属于dml操作,所以我们需要获取一个表,用表进行操作
Table table = connection.getTable(TableName.valueOf("emp"));
Scan scan = new Scan();
Filter filter = new SingleColumnValueFilter(Bytes.toBytes("empinfo"), Bytes.toBytes("sal"),
CompareOperator.EQUAL, Bytes.toBytes("3000"));
Filter filter2 = new ColumnValueFilter(Bytes.toBytes("empinfo"), Bytes.toBytes("sal"),
CompareOperator.EQUAL, Bytes.toBytes("3000"));
scan.setFilter(filter2);
ResultScanner scanner = table.getScanner(scan);
for (Result result : scanner)
{
byte[] enameb = result.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("sal"));
System.out.println("工资等于3000的用户是:" + Bytes.toString(enameb));
}
} catch (Exception e)
{
e.printStackTrace();
} finally
{
try
{
connection.close();
} catch (Exception e2)
{
e2.printStackTrace();
}
}
}
@Test
public void testPrefixFilter() {
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "192.168.112.111");
// 通过配置获取一个链接
Connection connection = null;
try
{
// 通过配置获取一个链接
connection = ConnectionFactory.createConnection(conf);
// 因为过滤器都属于dml操作,所以我们需要获取一个表,用表进行操作
Table table = connection.getTable(TableName.valueOf("emp"));
Scan scan = new Scan();
Filter filter = new ColumnPrefixFilter(Bytes.toBytes("sal"));
scan.setFilter(filter);
ResultScanner scanner = table.getScanner(scan);
for (Result result : scanner)
{
byte[] salb = result.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("sal"));
System.out.println("返回的列有哪些:" + Bytes.toString(salb));
}
} catch (Exception e)
{
e.printStackTrace();
} finally
{
try
{
connection.close();
} catch (Exception e2)
{
e2.printStackTrace();
}
}
}
// 通过多个列前缀来过滤返回哪些列,这几个前缀是或的关系,不是并的关系
@Test
public void testMultiPrefixFilter() {
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "192.168.112.111");
// 通过配置获取一个链接
Connection connection = null;
try
{
// 通过配置获取一个链接
connection = ConnectionFactory.createConnection(conf);
// 因为过滤器都属于dml操作,所以我们需要获取一个表,用表进行操作
Table table = connection.getTable(TableName.valueOf("emp"));
Scan scan = new Scan();
byte[][] prefixes =
{ Bytes.toBytes("sal"), Bytes.toBytes("ename") };
Filter filter = new MultipleColumnPrefixFilter(prefixes);
scan.setFilter(filter);
ResultScanner scanner = table.getScanner(scan);
for (Result result : scanner)
{
byte[] enameb = result.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("ename"));
byte[] salb = result.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("sal"));
System.out.print("返回名称:" + Bytes.toString(enameb));
System.out.println("和工资:" + Bytes.toString(salb));
}
} catch (Exception e)
{
e.printStackTrace();
} finally
{
try
{
connection.close();
} catch (Exception e2)
{
e2.printStackTrace();
}
}
}
// 通过多个列前缀来过滤返回哪些列,这几个前缀是或的关系,不是并的关系
@Test
public void testRowkeyFilter() {
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "192.168.112.111");
// 通过配置获取一个链接
Connection connection = null;
try
{
// 通过配置获取一个链接
connection = ConnectionFactory.createConnection(conf);
Table table = connection.getTable(TableName.valueOf("emp"));
// 因为过滤器都属于dml操作,所以我们需要获取一个表,用表进行操作
Filter filter = new RowFilter(CompareOperator.EQUAL, //比较运算符
new RegexStringComparator("7839")); //rowkey的值,可以使用一个正则表示式
Scan scan=new Scan();
scan.setFilter(filter);
ResultScanner scanner = table.getScanner(scan);
for (Result result : scanner)
{
byte[] enameb = result.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("ename"));
byte[] salb = result.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("sal"));
System.out.print("返回名称:" + Bytes.toString(enameb));
System.out.println("和工资:" + Bytes.toString(salb));
}
} catch (Exception e)
{
e.printStackTrace();
} finally
{
try
{
connection.close();
} catch (Exception e2)
{
e2.printStackTrace();
}
}
}
// 测试多过滤器一起进行过滤
@Test
public void testMultiFilter() {
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "192.168.112.111");
// 通过配置获取一个链接
Connection connection = null;
try
{
// 通过配置获取一个链接
connection = ConnectionFactory.createConnection(conf);
// 因为过滤器都属于dml操作,所以我们需要获取一个表,用表进行操作
Table table = connection.getTable(TableName.valueOf("emp"));
Scan scan = new Scan();
//先创建一个多过滤器集合,并指定这些过滤器之间是并的关系还是或的关系,Operator.MUST_PASS_ALL相当于and,
//Operator.MUST_PASS_one相对or
FilterList filterList=new FilterList(Operator.MUST_PASS_ALL);
//创建一个多列前缀过滤器
byte[][] prefixes =
{ Bytes.toBytes("sal"), Bytes.toBytes("ename") };
Filter filter = new MultipleColumnPrefixFilter(prefixes);
//再创建一个where条件的过滤器
Filter filter2 = new SingleColumnValueFilter(Bytes.toBytes("empinfo"), Bytes.toBytes("sal"),
CompareOperator.EQUAL, Bytes.toBytes("3000"));
//把所有的过滤器都添加的过滤器集合中
filterList.addFilter(filter);
filterList.addFilter(filter2);
scan.setFilter(filterList);
ResultScanner scanner = table.getScanner(scan);
for (Result result : scanner)
{
byte[] enameb = result.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("ename"));
byte[] salb = result.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("sal"));
System.out.print("返回名称:" + Bytes.toString(enameb));
System.out.println("和工资:" + Bytes.toString(salb));
}
} catch (Exception e)
{
e.printStackTrace();
} finally
{
try
{
connection.close();
} catch (Exception e2)
{
e2.printStackTrace();
}
}
}
}