package com.shujia;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
public class Demo04Filter {
Connection conn;
Admin admin;
TableName stuName;
Table stu;
//传入一个过滤器 返回ResultScanner对象
public ResultScanner getScanner(Filter filter) throws IOException {
Scan scan = new Scan();
scan.setFilter(filter);
return stu.getScanner(scan);
}
//对ResultScanner对象进行打印
//第一种情况:数据格式一致
public void printScanner(ResultScanner rss) {
//获取数据
for (Result rs : rss) {
String id = Bytes.toString(rs.getRow());
String name = Bytes.toString(rs.getValue("info".getBytes(),"name".getBytes()));
String age = Bytes.toString(rs.getValue("info".getBytes(),"age".getBytes()));
String gender = Bytes.toString(rs.getValue("info".getBytes(),"gender".getBytes()));
String clazz = Bytes.toString(rs.getValue("info".getBytes(),"clazz".getBytes()));
System.out.println(id+","+name+","+age+","+gender+","+clazz);
}
}
//第二种情况:数据格式不一致
public void printScannerWithCellUnit(ResultScanner rss){
for (Result rs : rss) {
String rk = Bytes.toString(rs.getRow());
for (Cell cell : rs.listCells()) {
String cf = Bytes.toString(CellUtil.cloneFamily(cell));
String qua=Bytes.toString(CellUtil.cloneQualifier(cell));
String value=Bytes.toString(CellUtil.cloneValue(cell));
System.out.println(rk+","+cf+","+qua+","+value);
}
}
}
@Before
public void init() throws IOException {
//创建HBase的配置
Configuration conf = HBaseConfiguration.create();
//配置HBase所属ZK集群的地址
conf.set("hbase.zookeeper.quorum","master:2181,node1:2181,node2:2181");
//建立连接
conn = ConnectionFactory.createConnection(conf);
//获取所有表的名字,这个操作跟表的数据无关
admin= conn.getAdmin();
stuName=TableName.valueOf("stu");
stu=conn.getTable(stuName);
}
@Test
//rowKey过滤器
// 通过RowFilter过滤比rowKey 1500100010 小的所有值出来
public void RowFilterWithBinaryComparator() throws IOException {
//因为涉及到不等式的比较 所以只能使用二进制比较器
BinaryComparator binaryComparator = new BinaryComparator("1500100010".getBytes());
//小于,所以用CompareOp.LESS
RowFilter rowFilter = new RowFilter(CompareFilter.CompareOp.LESS, binaryComparator);
printScanner(getScanner(rowFilter));
}
@Test
//列簇过滤器
// 通过FamilyFilter与SubstringComparator查询列簇名包含nf的列簇下面所有的数据
public void FamilyFilterWithSubstringComparator() throws IOException {
SubstringComparator substringComparator = new SubstringComparator("f");
FamilyFilter familyFilter = new FamilyFilter(CompareFilter.CompareOp.EQUAL, substringComparator);
printScannerWithCellUnit(getScanner(familyFilter));
}
@Test
//列名过滤器
// 通过QualifierFilter与SubstringComparator查询列名包含n/l/d的列下面所有的数据
public void QualifierFilterWithSubstringComparator() throws IOException {
RegexStringComparator regexStringComparator = new RegexStringComparator(".*[mld].*");
QualifierFilter qualifierFilter = new QualifierFilter(CompareFilter.CompareOp.EQUAL, regexStringComparator);
printScannerWithCellUnit(getScanner(qualifierFilter));
}
@Test
//列值过滤器
// 通过ValueFilter与BinaryPrefixComparator过滤出所有的cell(所有的值)中值以 "张" 开头的学生
//注意:并没有基于某一列去过滤 而是针对所有的cell的值进行过滤
public void ValueFilterWithBinaryPrefixComparator() throws IOException {
BinaryPrefixComparator binaryPrefixComparator = new BinaryPrefixComparator("张".getBytes());
ValueFilter valueFilter = new ValueFilter(CompareFilter.CompareOp.EQUAL, binaryPrefixComparator);
printScannerWithCellUnit(getScanner(valueFilter));
}
@Test
//单列值过滤器
//通过SingleColumnValueFilter与查询文科班所有学生信息
//会返回完整的一行数据
public void SingleColumnValueFilter() throws IOException {
SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter("info".getBytes()
, "clazz".getBytes()
, CompareFilter.CompareOp.EQUAL
, new BinaryPrefixComparator("文科".getBytes()));
printScanner(getScanner(singleColumnValueFilter));
}
@Test
//列值排除过滤器
//通过 SingleColumnValueExcludeFilter与查询文科班所有学生信息
//会返回完整的一行数据(除指定的比较列外)
public void SingleColumnValueExcludeFilter() throws IOException {
SingleColumnValueExcludeFilter singleColumnValueExcludeFilter = new SingleColumnValueExcludeFilter("info".getBytes()
, "clazz".getBytes()
, CompareFilter.CompareOp.EQUAL
, new BinaryPrefixComparator("文科".getBytes()));
printScanner(getScanner(singleColumnValueExcludeFilter));
}
@After
public void close() throws IOException {
//关闭连接
admin.close();
conn.close();
}
}
package com.shujia;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
public class Demo04Filter {
Connection conn;
Admin admin;
TableName stuName;
Table stu;
//传入一个过滤器 返回ResultScanner对象
public ResultScanner getScanner(Filter filter) throws IOException {
Scan scan = new Scan();
scan.setFilter(filter);
return stu.getScanner(scan);
}
//对ResultScanner对象进行打印
//第一种情况:数据格式一致
public void printScanner(ResultScanner rss) {
//获取数据
for (Result rs : rss) {
String id = Bytes.toString(rs.getRow());
String name = Bytes.toString(rs.getValue("info".getBytes(),"name".getBytes()));
String age = Bytes.toString(rs.getValue("info".getBytes(),"age".getBytes()));
String gender = Bytes.toString(rs.getValue("info".getBytes(),"gender".getBytes()));
String clazz = Bytes.toString(rs.getValue("info".getBytes(),"clazz".getBytes()));
System.out.println(id+","+name+","+age+","+gender+","+clazz);
}
}
//第二种情况:数据格式不一致
public void printScannerWithCellUnit(ResultScanner rss){
for (Result rs : rss) {
String rk = Bytes.toString(rs.getRow());
for (Cell cell : rs.listCells()) {
String cf = Bytes.toString(CellUtil.cloneFamily(cell));
String qua=Bytes.toString(CellUtil.cloneQualifier(cell));
String value=Bytes.toString(CellUtil.cloneValue(cell));
System.out.println(rk+","+cf+","+qua+","+value);
}
}
}
@Before
public void init() throws IOException {
//创建HBase的配置
Configuration conf = HBaseConfiguration.create();
//配置HBase所属ZK集群的地址
conf.set("hbase.zookeeper.quorum","master:2181,node1:2181,node2:2181");
//建立连接
conn = ConnectionFactory.createConnection(conf);
//获取所有表的名字,这个操作跟表的数据无关
admin= conn.getAdmin();
stuName=TableName.valueOf("stu");
stu=conn.getTable(stuName);
}
@Test
//rowKey过滤器
// 通过RowFilter过滤比rowKey 1500100010 小的所有值出来
public void RowFilterWithBinaryComparator() throws IOException {
//因为涉及到不等式的比较 所以只能使用二进制比较器
BinaryComparator binaryComparator = new BinaryComparator("1500100010".getBytes());
//小于,所以用CompareOp.LESS
RowFilter rowFilter = new RowFilter(CompareFilter.CompareOp.LESS, binaryComparator);
printScanner(getScanner(rowFilter));
}
@Test
//列簇过滤器
// 通过FamilyFilter与SubstringComparator查询列簇名包含nf的列簇下面所有的数据
public void FamilyFilterWithSubstringComparator() throws IOException {
SubstringComparator substringComparator = new SubstringComparator("f");
FamilyFilter familyFilter = new FamilyFilter(CompareFilter.CompareOp.EQUAL, substringComparator);
printScannerWithCellUnit(getScanner(familyFilter));
}
@Test
//列名过滤器
// 通过QualifierFilter与SubstringComparator查询列名包含n/l/d的列下面所有的数据
public void QualifierFilterWithSubstringComparator() throws IOException {
RegexStringComparator regexStringComparator = new RegexStringComparator(".*[mld].*");
QualifierFilter qualifierFilter = new QualifierFilter(CompareFilter.CompareOp.EQUAL, regexStringComparator);
printScannerWithCellUnit(getScanner(qualifierFilter));
}
@Test
//列值过滤器
// 通过ValueFilter与BinaryPrefixComparator过滤出所有的cell(所有的值)中值以 "张" 开头的学生
//注意:并没有基于某一列去过滤 而是针对所有的cell的值进行过滤
public void ValueFilterWithBinaryPrefixComparator() throws IOException {
BinaryPrefixComparator binaryPrefixComparator = new BinaryPrefixComparator("张".getBytes());
ValueFilter valueFilter = new ValueFilter(CompareFilter.CompareOp.EQUAL, binaryPrefixComparator);
printScannerWithCellUnit(getScanner(valueFilter));
}
@Test
//单列值过滤器
//通过SingleColumnValueFilter与查询文科班所有学生信息
//会返回完整的一行数据
public void SingleColumnValueFilter() throws IOException {
SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter("info".getBytes()
, "clazz".getBytes()
, CompareFilter.CompareOp.EQUAL
, new BinaryPrefixComparator("文科".getBytes()));
printScanner(getScanner(singleColumnValueFilter));
}
@Test
//列值排除过滤器
//通过 SingleColumnValueExcludeFilter与查询文科班所有学生信息
//会返回完整的一行数据(除指定的比较列外)
public void SingleColumnValueExcludeFilter() throws IOException {
SingleColumnValueExcludeFilter singleColumnValueExcludeFilter = new SingleColumnValueExcludeFilter("info".getBytes()
, "clazz".getBytes()
, CompareFilter.CompareOp.EQUAL
, new BinaryPrefixComparator("文科".getBytes()));
printScanner(getScanner(singleColumnValueExcludeFilter));
}
@Test
//rowKey前缀过滤器
//通过PrefixFilter查询以150010008开头的所有前缀的rowKey
//PrefixFilter=RowFilter+BinaryPrefixComparator
public void PrefixFilter() throws IOException {
PrefixFilter prefixFilter = new PrefixFilter("150010008".getBytes());
printScanner(getScanner(prefixFilter));
}
@Test
//分页过滤器
//pageSize 10
//pageNum 3
//做分页
public void PageFilter() throws IOException {
int pageNum=3;
int pageSize=10;
PageFilter pageFilter = new PageFilter((pageNum-1)*pageSize+1);
Scan scan = new Scan();
scan.setFilter(pageFilter);
ResultScanner rss = stu.getScanner(scan);
String rk="";
for (Result rs : rss) {
rk=Bytes.toString(rs.getRow());
}
Scan scan1 = new Scan();
scan1.withStartRow(rk.getBytes());
// PageFilter pageFilter1 = new PageFilter(pageSize);
// scan1.setFilter(pageFilter1);
scan1.setLimit(pageSize);
printScanner(stu.getScanner(scan1));
}
@Test
/**
* PageFilter效率太低,需要遍历
* 所以分页一般需要结合RowKey的设计去实现
*/
public void pageWithRk() throws IOException {
int rk_base=1500100000;
int pageNum=7;
int pageSize=10;
String startRow=rk_base+(pageNum-1)*pageSize+1+"";
String endRow=rk_base+pageNum*pageSize+1+"";
Scan scan = new Scan();
scan.withStartRow(startRow.getBytes());
scan.withStopRow(endRow.getBytes());
printScanner(stu.getScanner(scan));
}
@Test
//多条件查询
//过滤出 文科一班 的 男生的信息 并且 id 包含 8
public void FilterList() throws IOException {
SingleColumnValueFilter filter1= new SingleColumnValueFilter("info".getBytes()
, "clazz".getBytes()
, CompareFilter.CompareOp.EQUAL
, new BinaryPrefixComparator("文科一班".getBytes()));
SingleColumnValueFilter filter2 = new SingleColumnValueFilter("info".getBytes()
, "gender".getBytes()
, CompareFilter.CompareOp.EQUAL
, new BinaryPrefixComparator("男".getBytes()));
RowFilter rowFilter = new RowFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator("8"));
/**
* Operator.MUST_PASS_ALL 相当于 and 默认
* Operator.MUST_PASS_ONE 相当于 or
*/
FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ALL);
filterList.addFilter(filter1);
filterList.addFilter(filter2);
filterList.addFilter(rowFilter);
printScanner(getScanner(filterList));
}
@After
public void close() throws IOException {
//关闭连接
admin.close();
conn.close();
}
}