Hbase过滤器

花与少年°

已于 2022-04-21 17:12:42 修改

阅读量148

点赞数

分类专栏： java-spark hadoop-hbase 文章标签： hbase

于 2020-11-09 14:09:08 首次发布

本文链接：https://blog.csdn.net/weixin_37630333/article/details/109576120

版权

java-spark 同时被 2 个专栏收录

10 篇文章 0 订阅

订阅专栏

hadoop-hbase

4 篇文章 0 订阅

订阅专栏

过滤器：https://blog.csdn.net/m0_37809146/article/details/91128097

单列列值过滤器

代码含义：查询TCL列族，flag列字段，值为N的数据

SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter("TCL".getBytes(),
"flag".getBytes(), 
CompareOperator.EQUAL, 
new SubstringComparator("N"));//Bytes.toBytes("N")
singleColumnValueFilter.setFilterIfMissing(false);//默认false:数据没有flag字段也返回;true:数据没有flag字段，不返回
scan.setFilter(singleColumnValueFilter);

SubstringComparator：包含某值
详细参考：https://www.cnblogs.com/zpb2016/p/12775374.html

列名前缀过滤器

ColumnPrefixFilter columnPrefixFilter = new ColumnPrefixFilter(Bytes.toBytes("age"));//匹配列名前缀为age的字段，存在则返回该行
scan.setFilter(columnPrefixFilter);
//过滤完结果：为前四个

图片数据
3. 行键前缀过滤器

PrefixFilter prefixFilter = new PrefixFilter(Bytes.toBytes("1"));
scan.setFilter(prefixFilter);

//参考 2.图片数据  ，返回结果为：第一行

import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
import java.util.Map.Entry;
import org.apache.hadoop.hbase.CompareOperator;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.filter.ColumnPrefixFilter;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.filter.SubstringComparator;
import org.apache.hadoop.hbase.filter.FilterList.Operator;
import org.apache.hadoop.hbase.util.Bytes;

import com.fahaicc.zeus.util.HBaseUtil;

public class HbaseFilterTest {

	static String hbaseName = "TC:TEST_CASE";

	public static void main(String[] args) throws Exception {
//		saveHbaseData();
		doWork();
	}

	/*
	 * 1. name：王思聪，address：上海，age：35     
	 * 2. name：周杰伦，address：台北，age：43      
	 * 3. english_name：Lebron James，address：洛杉矶         
	 * 4. name：莎拉波娃，gender：女，english_name：shalabowa
	 */
	private static void doWork() throws Exception {
		Table table = HBaseUtil.getTable(hbaseName);
		Scan scan = new Scan();

		FilterList filterList = new FilterList(Operator.MUST_PASS_ALL);
		
		testOne(scan,filterList);//代码含义：查询ETL列族的age字段，值为 'null' 的数据
//		testTwo(scan,filterList);//代码含义：查找列名前缀‘add’开头的列值

		ResultScanner resultScanner = table.getScanner(scan);
		for (Result result : resultScanner) {
			Map map = convertToMap(result);
			map.keySet().forEach(key -> System.out.print(key + "__" + map.get(key) + "  /  "));
			System.out.println("=========");
		}

	}

	/*
	 * ColumnPrefixFilter:列名前缀过滤器
	 * 代码含义：查找列名前缀‘add’开头的列值
	 */
	private static void testTwo(Scan scan, FilterList filterList) {
		ColumnPrefixFilter columnPrefixFilter = new ColumnPrefixFilter(Bytes.toBytes("add"));
		
		filterList.addFilter(columnPrefixFilter);
		scan.setFilter(filterList);
		/*
		 ETL:address__上海  /  rowKey__id_1_wsc
		 ETL:address__台北  /  rowKey__id_2_zjl
		 ETL:address__洛杉矶  /  rowKey__id_3_james
		 */
	}

	/**
	 * SingleColumnValueFilter:单列列值过滤器
	 * 代码含义：查询ETL列族的age字段，值为 'null' 的数据
	 */
	private static void testOne(Scan scan, FilterList filterList) {
		
		SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter("ETL".getBytes(),
				"age".getBytes(), CompareOperator.EQUAL, new SubstringComparator("null"));
		// 默认为false，当这一列不存在时，会返回所有的列信息，例：没有age字段，则返回该条数据
		singleColumnValueFilter.setFilterIfMissing(false);

		/*
		 * 返回结果： （1-2数据中的age都不为null，3-4没有age字段直接返回） 
		 * 3. english_name：Lebron James，address：洛杉矶 
		 * 4. name：莎拉波娃，gender：女，english_name：shalabowa
		 */

		filterList.addFilter(singleColumnValueFilter);
		scan.setFilter(filterList);

	}

	/*
	 * 准备测试数据
	 */
	private static void saveHbaseData() throws Exception {
		Put put1 = new Put(Bytes.toBytes("id_1_wsc"));
		put1.addColumn(Bytes.toBytes("ETL"), Bytes.toBytes("name"), Bytes.toBytes("王思聪"));
		put1.addColumn(Bytes.toBytes("ETL"), Bytes.toBytes("address"), Bytes.toBytes("上海"));
		put1.addColumn(Bytes.toBytes("ETL"), Bytes.toBytes("age"), Bytes.toBytes("35"));

		Put put2 = new Put(Bytes.toBytes("id_2_zjl"));
		put2.addColumn(Bytes.toBytes("ETL"), Bytes.toBytes("name"), Bytes.toBytes("周杰伦"));
		put2.addColumn(Bytes.toBytes("ETL"), Bytes.toBytes("address"), Bytes.toBytes("台北"));
		put2.addColumn(Bytes.toBytes("ETL"), Bytes.toBytes("age"), Bytes.toBytes("43"));

		Put put3 = new Put(Bytes.toBytes("id_3_james"));
		put3.addColumn(Bytes.toBytes("ETL"), Bytes.toBytes("english_name"), Bytes.toBytes("LeBron James"));
		put3.addColumn(Bytes.toBytes("ETL"), Bytes.toBytes("address"), Bytes.toBytes("洛杉矶"));

		Put put4 = new Put(Bytes.toBytes("id_4_slbw"));
		put4.addColumn(Bytes.toBytes("ETL"), Bytes.toBytes("name"), Bytes.toBytes("莎拉波娃"));
		put4.addColumn(Bytes.toBytes("ETL"), Bytes.toBytes("gender"), Bytes.toBytes("女"));
		put4.addColumn(Bytes.toBytes("ETL"), Bytes.toBytes("english_name"), Bytes.toBytes("shalabowa"));

		List<Put> puts = Arrays.asList(put1, put2, put3, put4);
		HBaseUtil.saveData(hbaseName, puts);

	}

	public static Map<String,String> convertToMap(Result result){
		Map<String,String> map = new HashMap<>();
		NavigableMap<byte[], NavigableMap<byte[], byte[]>> noVersionMap = result.getNoVersionMap();
		if(noVersionMap != null) {
			map.put("rowKey", Bytes.toString(result.getRow()));
			for(Entry<byte[], NavigableMap<byte[], byte[]>> fq : noVersionMap.entrySet()) {
				String f = Bytes.toString(fq.getKey());
				if(f.length() == 0) {
					continue;
				}
				for(Entry<byte[], byte[]> qv : fq.getValue().entrySet()) {
					map.put(f+":"+Bytes.toString(qv.getKey())
					, Bytes.toString(qv.getValue()));
				}
			}
		}
		return map;
	}
}

花与少年°

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Hbase过滤器

单列列值过滤器代码含义：查询TCL列族，flag列字段，值为N的数据SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter("TCL".getBytes(),"flag".getBytes(), CompareOperator.EQUAL, new SubstringComparator("N"));//Bytes.toBytes("N")singleColumnValueFilter.set.
复制链接

扫一扫

专栏目录