假设:
Rowkey设计为:
rowkey=分区号(userId的最后一位+补充0,组成两位数)-注册时间的时间戳(13位)-u+userId(按千万量计算,加前缀u,共6位)
现在要查询出时间段:1564647622838 到1564647622938 这个时间段的数据
#方案一:采用过滤器 无法单纯通过这个时间段查询数据
#方案二:设置startRow,endRow
startRow="70-1564647622838-u000000",endRow="80-1564647622938-u1000000";
不太满足要求,80分区中时间段小于1564647622838数据也能查询出来
#方案三:分区查询再累加
70区域这个时间段的数据:
startRow="70-1564647622838-u000000",endRow="70-1564647622938-u1000000";
80区域这个时间段的数据:
startRow="80-1564647622838-u000000",endRow="80-1564647622938-u1000000";
最后这个两个区域70,80都是这个时间段的结果累加起来。
下面是前面两篇博文的完整代码:
package com.meboth.hive.connection.hbase.createPartition;
import com.meboth.hive.connection.hbase.hbase.HbaseUtil;
import com.meboth.hive.connection.hbase.utils.DateUtil;
import com.meboth.hive.connection.hbase.utils.HbaseConnectionUtils;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.log4j.Logger;
import java.io.IOException;
import java.util.*;
/**
* @className CreateHbasePartitionTable
* @Description TODO
* @Author liujianfu
* @Date 2019/7/13 16:40
* @Version 1.0
**/
public class CreateHbasePartitionTable {
private final static Logger log= Logger.getLogger(String.valueOf(CreateHbasePartitionTable.class));
public static void main(String args[])throws IOException{
//createIndexTsble();//索引表
// createInfoTable();//信息表
/*************查询 *******************/
/*
String startRow="70-u000";
String endRow="90-u100";
String tableNameIndex= "hb_yonghu_index";
Table tableIndex=HbaseConnectionUtils.getInstance().getTable(tableNameIndex);
scanByRangeQuery(tableIndex,startRow,endRow);//范围查询
*/
/**
scanByRegexFilter(tableIndex, "u073"); //正则查询
**/
/**
String rowKey="90-u100";
scanByRowKeyFilter(tableIndex,rowKey);//rowkey查询
**/
Table table=HbaseConnectionUtils.getInstance().getTable("hb_yonghu");
String beginStr="70-1564647622838-u000000";
String endStr="80-1564647622938-u1000000";
// scanByPrefixQuery(table,"70");
//scanByContainsFilter(table,"","1564647622836");//时间查询
scanByRangeTimeQuery(table,beginStr,endStr);//时间范围查询;80-0000000-XXX到80-1564647622938的数据也能查询到
// 80分区比70分区大,即80分区的所有数据均能查询出来。
//要求要查询:时间段:1564647622838 到1564647622938 这个时间段的数据
// startRow="70-1564647622838-u000000",endRow="80-1564647622938-u1000000";不太满足要求,80分区中时间段小于1564647622838数据也能查询出来
// 解决办法:分区查询再累加;70区域这个时间段的数据: startRow="70-1564647622838-u000000",endRow="70-1564647622938-u1000000";
// 80区域这个时间段的数据: startRow="80-1564647622838-u000000",endRow="80-1564647622938-u1000000";
// 最后将结果累加起来
}
/**
* 创建索引表
*/
public static void createIndexTsble(){
String tableNameIndex= "hb_yonghu_index";
String familyArray[]={"index-rw"};
initUserTable(tableNameIndex, familyArray,true);
Table tableIndex=HbaseConnectionUtils.getInstance().getTable( tableNameIndex);
batchPutIndex(tableIndex);//索引表
}
/**
* 创建信息表
*/
public static void createInfoTable(){
String tableName= "hb_yonghu";
String familyArray[]={"index-rw"};
initUserTable(tableName, familyArray,true);
Table table=HbaseConnectionUtils.getInstance().getTable( tableName);
batchPut(table);//信息表
}
public static void initUserTable(String tableName,String familyArray[],boolean partionFlag){
List<String> list=new ArrayList<String>();
try {
Admin hadmin = HbaseConnectionUtils.getInstance().getConnection().getAdmin();
TableName tm = TableName.valueOf(tableName);
if (!hadmin.tableExists(TableName.valueOf(tableName))) {
HTableDescriptor hTableDescriptor = new HTableDescriptor(tm);
for(String colFa:familyArray){
HColumnDescriptor family = new HColumnDescriptor(colFa);
family.setMaxVersions(1);
hTableDescriptor.addFamily(family);
}
if(partionFlag){
hadmin.createTable(hTableDescriptor, getSplitKeys());
}
else {
hadmin.createTable(hTableDescriptor);//不分区
}
hadmin.close();
}
else {
log.info("................新建表:"+tableName+"已存在..........................");
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
log.info("................................................create hbase table "+tableName+" successful..........");
}
public static byte[][] getSplitKeys() {
String[] keys = new String[] {"10|", "20|", "30|", "40|", "50|", "60|", "70|", "80|", "90|" };
byte[][] splitKeys = new byte[keys.length][];
TreeSet<byte[]> rows = new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);//升序排序
for (int i = 0; i < keys.length; i++) {
rows.add(Bytes.toBytes(keys[i]));
}
Iterator<byte[]> rowKeyIter = rows.iterator();
int i=0;
while (rowKeyIter.hasNext()) {
byte[] tempRow = rowKeyIter.next();
rowKeyIter.remove();
splitKeys[i] = tempRow;
i++;
}
return splitKeys;
}
/**
提前创建预分区。划分10个区域,预计100数据量,每个分区10
* *用户id小于10的都划分在“00”
* *用户id大于10且小于20的都划分在“10区域”
* 依次类推.....
* rowkey的策略:
* 分区编号(根据用户id所在划分的分区编号获得,见上面解释)+"-u"+u.getUserId();
*/
public static void batchPutIndex(Table hTable) {
List<Put> list = new ArrayList<Put>();
for (int i = 1; i <= 100; i++) {
String regionNo="00";
regionNo= getRegionNo(i,regionNo);
String k=i+"";
if(k.length()<3){
StringBuffer sb=new StringBuffer();
for(int m=0;m<3-k.length();m++){
sb.append("0");
}
k=sb.toString()+k;
}
byte[] rowkey = Bytes.toBytes(regionNo +"-u"+k);
Put put = new Put(rowkey);
put.addColumn("index-rw".getBytes(), "info".getBytes(), Bytes.toBytes("zs" + i));
list.add(put);
}
try {
hTable.put(list);
} catch (IOException e) {
e.printStackTrace();
}
list.clear();
log.info("添加数据成功..........................");
}
public static String getRegionNo(int i,String regionNo){
if(i<10){
regionNo="00";
}
else if(i>=10&&i<20){
regionNo="10";
}
else if(i>=20&&i<30){
regionNo="20";
}
else if(i>=30&&i<40){
regionNo="30";
}
else if(i>=50&&i<60){
regionNo="40";
}
else if(i>=50&&i<60){
regionNo="50";
}
else if(i>=60&&i<70){
regionNo="60";
}
else if(i>=70&&i<80){
regionNo="70";
}
else if(i>=80&&i<90){
regionNo="80";
}
else{
regionNo="90";
}
return regionNo;
}
/**
* #创建表,提前创建预分区。划分10个区域
* 用户id末位为0的都划分在“10|”
*用户id末位为1的都划分在“10|-20|”
*用户id末位为2的都划分在“20|-30|”
*
* #设计rowkey
* rowkey=分区号(userId的最后一位+补充0,组成两位数)-注册时间的时间戳(13位)-u+userId(按千万量计算,加前缀u,共9位)
* rowkey的长度=2+1+13+1+9=26位
* @param hTable
*/
public static void batchPut(Table hTable) {
List<Put> list = new ArrayList<Put>();
for (int i = 1; i <= 10000; i++) {
String k=i+"";
if(k.length()<6){
StringBuffer sb=new StringBuffer();
for(int m=0;m<6-k.length();m++){
sb.append("0");
}
k=sb.toString()+k;
}
String n=k.substring(k.length()-1);
n=n+"0";//分区号(userId的最后一位+补充0,组成两位数)
byte[] rowkey = Bytes.toBytes(n + "-" +System.currentTimeMillis()+"-u"+k);//分区号(userId的最后一位+补充0,组成两位数)
Put put = new Put(rowkey);
put.addColumn("index-rw".getBytes(), "info".getBytes(), Bytes.toBytes("zs" + i));
list.add(put);
}
try {
hTable.put(list);
} catch (IOException e) {
e.printStackTrace();
}
list.clear();
log.info("添加数据成功..........................");
}
private static String getRandomNumber(){
String ranStr = Math.random()+"";
int pointIndex = ranStr.indexOf(".");
return ranStr.substring(pointIndex+1, pointIndex+3);
}
/**
* scan 设置查询条件的模糊查询
* @param hTable
* @throws IOException
*/
private static void scanRecordByParam(Table hTable,String startRow) throws IOException {
Scan scan = new Scan();
//scan.addColumn(Bytes.toBytes("cf"),Bytes.toBytes("name"));
scan.setStartRow(Bytes.toBytes("90-000000-1563011905980"));
scan.setStopRow(Bytes.toBytes("90-009939-1563011905981"));
Filter filter2 = new RowFilter(CompareFilter.CompareOp.GREATER,new RegexStringComparator(".*-1563011905970"));//90-009979
Filter filter3 = new RowFilter(CompareFilter.CompareOp.LESS,new RegexStringComparator(".*-1563011905981"));//90-009979
// Filter mFilter = new PrefixFilter(Bytes.toBytes("90-009829"));//通过用户id查询
Filter isf=new InclusiveStopFilter(Bytes.toBytes("80-009839-1563011905979"));//OK包含了扫描的上限在结果之内
FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ALL);
filterList.addFilter(filter2);
filterList.addFilter(filter3);
List<Long> list = new ArrayList<Long>();
list.add(1563011905970L);
list.add(1563011905981L);
TimestampsFilter filter = new TimestampsFilter(list);
// scan.setFilter(filterList);
ResultScanner scanner = hTable.getScanner(scan);
for (Result result : scanner) {
System.out.println("rowKey:"+new String(result.getRow()));
for (Cell cell : result.rawCells()) {
// System.out.println("列族:"+new String(CellUtil.cloneFamily(cell))+ " 列:"+new String(CellUtil.cloneQualifier(cell))+ " 值:"+new String(CellUtil.cloneValue(cell)));
}
}
}
/**
* scan 设置时间范围查询
* @param hTable
* @throws IOException
*/
private static void scanByRangeQuery(Table hTable,String startRow,String endRow) throws IOException {
Scan scan = new Scan();
scan.setStartRow(Bytes.toBytes(startRow));//范围查询
scan.setStopRow(Bytes.toBytes(endRow));
ResultScanner scanner = hTable.getScanner(scan);
for (Result result : scanner) {
System.out.println("rowKey:"+new String(result.getRow()));
for (Cell cell : result.rawCells()) {
// System.out.println("列族:"+new String(CellUtil.cloneFamily(cell))+ " 列:"+new String(CellUtil.cloneQualifier(cell))+ " 值:"+new String(CellUtil.cloneValue(cell)));
}
}
}
/**
* scan 设置时间范围查询
* @param hTable
* @throws IOException
*/
private static void scanByRangeTimeQuery(Table hTable,String startRow,String endRow) throws IOException {
Scan scan = new Scan();
scan.setStartRow(Bytes.toBytes(startRow));//范围查询
scan.setStopRow(Bytes.toBytes(endRow));
ResultScanner scanner = hTable.getScanner(scan);
for (Result result : scanner) {
System.out.println("rowKey:"+new String(result.getRow()));
for (Cell cell : result.rawCells()) {
// System.out.println("列族:"+new String(CellUtil.cloneFamily(cell))+ " 列:"+new String(CellUtil.cloneQualifier(cell))+ " 值:"+new String(CellUtil.cloneValue(cell)));
}
}
}
/**
* scan 前缀过滤器
* @param hTable
* @throws IOException
*/
private static void scanByPrefixQuery(Table hTable,String prefix) throws IOException {
Scan scan = new Scan();
Filter filter = new PrefixFilter(Bytes.toBytes(prefix));
scan.setFilter(filter);
ResultScanner scanner = hTable.getScanner(scan);
for (Result result : scanner) {
System.out.println("rowKey:"+new String(result.getRow()));
for (Cell cell : result.rawCells()) {
// System.out.println("列族:"+new String(CellUtil.cloneFamily(cell))+ " 列:"+new String(CellUtil.cloneQualifier(cell))+ " 值:"+new String(CellUtil.cloneValue(cell)));
}
}
}
/**
* scan 正则过滤器,根据用户id查询用户
* 提取rowkey以01结尾数据
* Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL,new RegexStringComparator(".*01$"));
* 提取rowkey以包含201407的数据
* Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL,new SubstringComparator("201407"));
* 提取rowkey以123开头的数据
* Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL,new BinaryPrefixComparator("123".getBytes()));
* @param hTable
* @throws IOException
*/
private static void scanByRegexFilter(Table hTable,String userId) throws IOException {
Scan scan = new Scan();
Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL,new RegexStringComparator(".*-"+userId));//查询uid
scan.setFilter(filter);
ResultScanner scanner = hTable.getScanner(scan);
for (Result result : scanner) {
System.out.println("rowKey:"+new String(result.getRow()));
for (Cell cell : result.rawCells()) {
// System.out.println("列族:"+new String(CellUtil.cloneFamily(cell))+ " 列:"+new String(CellUtil.cloneQualifier(cell))+ " 值:"+new String(CellUtil.cloneValue(cell)));
}
}
}
/**
* scan 包含过滤器
* @param hTable
* @throws IOException
*/
private static void scanByContainsFilter(Table hTable,String startRow,String param) throws IOException {
Scan scan = new Scan();
Filter filter = new RowFilter(CompareFilter.CompareOp.NO_OP,new SubstringComparator(param));
scan.setFilter(filter);
ResultScanner scanner = hTable.getScanner(scan);
for (Result result : scanner) {
System.out.println("rowKey:"+new String(result.getRow()));
for (Cell cell : result.rawCells()) {
// System.out.println("列族:"+new String(CellUtil.cloneFamily(cell))+ " 列:"+new String(CellUtil.cloneQualifier(cell))+ " 值:"+new String(CellUtil.cloneValue(cell)));
}
}
}
/**
* scan 通过rowkey过滤器进行查看
* @param hTable
* @throws IOException
*/
private static void scanByRowKeyFilter(Table hTable,String rowKey) throws IOException {
Scan scan = new Scan();
Filter filter=new RowFilter(CompareFilter.CompareOp.GREATER_OR_EQUAL,new BinaryComparator(Bytes.toBytes(rowKey)));//OK筛选出匹配的所有的行
scan.setFilter(filter);
ResultScanner scanner = hTable.getScanner(scan);
for (Result result : scanner) {
System.out.println("rowKey:"+new String(result.getRow()));
for (Cell cell : result.rawCells()) {
// System.out.println("列族:"+new String(CellUtil.cloneFamily(cell))+ " 列:"+new String(CellUtil.cloneQualifier(cell))+ " 值:"+new String(CellUtil.cloneValue(cell)));
}
}
}
}