Hbase 的弊端按时间段范围查询（三）

最新推荐文章于 2024-08-27 00:42:19 发布
健康平安的活着
最新推荐文章于 2024-08-27 00:42:19 发布
阅读量7.8k
点赞数 3
分类专栏：大数据 hbase
本文链接：https://blog.csdn.net/u011066470/article/details/98180569
版权
大数据同时被 2 个专栏收录
73 篇文章 2 订阅
订阅专栏
hbase
10 篇文章 0 订阅
订阅专栏
假设：
Rowkey设计为：
rowkey=分区号(userId的最后一位+补充0,组成两位数)-注册时间的时间戳(13位)-u+userId(按千万量计算，加前缀u，共6位)
现在要查询出时间段:1564647622838 到1564647622938 这个时间段的数据
#方案一：采用过滤器无法单纯通过这个时间段查询数据
#方案二：设置startRow，endRow
startRow="70-1564647622838-u000000"，endRow="80-1564647622938-u1000000";
不太满足要求，80分区中时间段小于1564647622838数据也能查询出来
#方案三：分区查询再累加
70区域这个时间段的数据：
startRow="70-1564647622838-u000000"，endRow="70-1564647622938-u1000000";
80区域这个时间段的数据：
startRow="80-1564647622838-u000000"，endRow="80-1564647622938-u1000000";
最后这个两个区域70,80都是这个时间段的结果累加起来。
下面是前面两篇博文的完整代码：
package com.meboth.hive.connection.hbase.createPartition;

import com.meboth.hive.connection.hbase.hbase.HbaseUtil;
import com.meboth.hive.connection.hbase.utils.DateUtil;
import com.meboth.hive.connection.hbase.utils.HbaseConnectionUtils;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.log4j.Logger;

import java.io.IOException;
import java.util.*;

/**
 * @className CreateHbasePartitionTable
 * @Description TODO
 * @Author liujianfu
 * @Date 2019/7/13 16:40
 * @Version 1.0
 **/

public class CreateHbasePartitionTable {
    private  final static Logger log= Logger.getLogger(String.valueOf(CreateHbasePartitionTable.class));
    public static void main(String args[])throws IOException{

       //createIndexTsble();//索引表
       // createInfoTable();//信息表
        /*************查询 *******************/
        /*
        String startRow="70-u000";
        String endRow="90-u100";
        String tableNameIndex=  "hb_yonghu_index";
        Table tableIndex=HbaseConnectionUtils.getInstance().getTable(tableNameIndex);
       scanByRangeQuery(tableIndex,startRow,endRow);//范围查询
       */

        /**
        scanByRegexFilter(tableIndex, "u073"); //正则查询
         **/

        /**
         String rowKey="90-u100";
        scanByRowKeyFilter(tableIndex,rowKey);//rowkey查询
         **/
        Table table=HbaseConnectionUtils.getInstance().getTable("hb_yonghu");
        String beginStr="70-1564647622838-u000000";
        String endStr="80-1564647622938-u1000000";
       // scanByPrefixQuery(table,"70");

       //scanByContainsFilter(table,"","1564647622836");//时间查询

       scanByRangeTimeQuery(table,beginStr,endStr);//时间范围查询;80-0000000-XXX到80-1564647622938的数据也能查询到
       // 80分区比70分区大，即80分区的所有数据均能查询出来。
        //要求要查询：时间段:1564647622838 到1564647622938 这个时间段的数据
       //  startRow="70-1564647622838-u000000"，endRow="80-1564647622938-u1000000";不太满足要求，80分区中时间段小于1564647622838数据也能查询出来
       // 解决办法：分区查询再累加；70区域这个时间段的数据：  startRow="70-1564647622838-u000000"，endRow="70-1564647622938-u1000000";
        //                         80区域这个时间段的数据：  startRow="80-1564647622838-u000000"，endRow="80-1564647622938-u1000000";
        //                最后将结果累加起来



    }

    /**
     * 创建索引表
     */
    public static void createIndexTsble(){
        String tableNameIndex=  "hb_yonghu_index";
        String familyArray[]={"index-rw"};
        initUserTable(tableNameIndex, familyArray,true);
        Table tableIndex=HbaseConnectionUtils.getInstance().getTable( tableNameIndex);
        batchPutIndex(tableIndex);//索引表
    }

    /**
     * 创建信息表
     */
    public static void createInfoTable(){
        String tableName=  "hb_yonghu";
        String familyArray[]={"index-rw"};
        initUserTable(tableName, familyArray,true);
        Table table=HbaseConnectionUtils.getInstance().getTable( tableName);
        batchPut(table);//信息表
    }
    public static void initUserTable(String tableName,String familyArray[],boolean partionFlag){
        List<String> list=new ArrayList<String>();
        try {
            Admin hadmin = HbaseConnectionUtils.getInstance().getConnection().getAdmin();
            TableName tm = TableName.valueOf(tableName);
            if (!hadmin.tableExists(TableName.valueOf(tableName))) {
                HTableDescriptor hTableDescriptor = new HTableDescriptor(tm);
                for(String colFa:familyArray){
                    HColumnDescriptor family = new HColumnDescriptor(colFa);
                    family.setMaxVersions(1);
                    hTableDescriptor.addFamily(family);
                }
                if(partionFlag){
                    hadmin.createTable(hTableDescriptor, getSplitKeys());
                }
                else {
                    hadmin.createTable(hTableDescriptor);//不分区
                }
                hadmin.close();
            }
            else {
                log.info("................新建表:"+tableName+"已存在..........................");
            }
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        log.info("................................................create hbase table "+tableName+" successful..........");
    }
    public static  byte[][] getSplitKeys() {
        String[] keys = new String[] {"10|", "20|", "30|", "40|", "50|", "60|", "70|", "80|", "90|" };
        byte[][] splitKeys = new byte[keys.length][];
        TreeSet<byte[]> rows = new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);//升序排序
        for (int i = 0; i < keys.length; i++) {
            rows.add(Bytes.toBytes(keys[i]));
        }
        Iterator<byte[]> rowKeyIter = rows.iterator();
        int i=0;
        while (rowKeyIter.hasNext()) {
            byte[] tempRow = rowKeyIter.next();
            rowKeyIter.remove();
            splitKeys[i] = tempRow;
            i++;
        }
        return splitKeys;
    }
    /**
     提前创建预分区。划分10个区域，预计100数据量，每个分区10
     *      *用户id小于10的都划分在“00”
     *      *用户id大于10且小于20的都划分在“10区域”
     *     依次类推.....
     *     rowkey的策略:
     *     分区编号(根据用户id所在划分的分区编号获得，见上面解释)+"-u"+u.getUserId();
     */
    public  static void batchPutIndex(Table hTable) {
        List<Put> list = new ArrayList<Put>();
        for (int i = 1; i <= 100; i++) {
            String regionNo="00";
           regionNo= getRegionNo(i,regionNo);
            String k=i+"";
            if(k.length()<3){
                StringBuffer sb=new StringBuffer();
                for(int m=0;m<3-k.length();m++){
                    sb.append("0");
                }
                k=sb.toString()+k;
            }
            byte[] rowkey = Bytes.toBytes(regionNo +"-u"+k);
            Put put = new Put(rowkey);
            put.addColumn("index-rw".getBytes(), "info".getBytes(), Bytes.toBytes("zs" + i));
            list.add(put);
        }
        try {
            hTable.put(list);
        } catch (IOException e) {
            e.printStackTrace();
        }
        list.clear();
        log.info("添加数据成功..........................");
    }
    public static String getRegionNo(int i,String regionNo){
        if(i<10){
            regionNo="00";
        }
        else if(i>=10&&i<20){
            regionNo="10";
        }
        else if(i>=20&&i<30){
            regionNo="20";
        }
        else if(i>=30&&i<40){
            regionNo="30";
        }
        else if(i>=50&&i<60){
            regionNo="40";
        }
        else if(i>=50&&i<60){
            regionNo="50";
        }
        else if(i>=60&&i<70){
            regionNo="60";
        }
        else if(i>=70&&i<80){
            regionNo="70";
        }
        else if(i>=80&&i<90){
            regionNo="80";
        }
        else{
            regionNo="90";
        }
        return regionNo;
    }
    /**
     * #创建表，提前创建预分区。划分10个区域
     * 用户id末位为0的都划分在“10|”
     *用户id末位为1的都划分在“10|-20|”
     *用户id末位为2的都划分在“20|-30|”
     *
     * #设计rowkey
     * rowkey=分区号(userId的最后一位+补充0,组成两位数)-注册时间的时间戳(13位)-u+userId(按千万量计算，加前缀u，共9位)
     * rowkey的长度=2+1+13+1+9=26位
     * @param hTable
     */
    public  static void batchPut(Table hTable) {
        List<Put> list = new ArrayList<Put>();
        for (int i = 1; i <= 10000; i++) {
              String k=i+"";
              if(k.length()<6){
                  StringBuffer sb=new StringBuffer();
                  for(int m=0;m<6-k.length();m++){
                      sb.append("0");
                  }
                  k=sb.toString()+k;
              }
              String n=k.substring(k.length()-1);
              n=n+"0";//分区号(userId的最后一位+补充0,组成两位数)
            byte[] rowkey = Bytes.toBytes(n +  "-" +System.currentTimeMillis()+"-u"+k);//分区号(userId的最后一位+补充0,组成两位数)
            Put put = new Put(rowkey);
            put.addColumn("index-rw".getBytes(), "info".getBytes(), Bytes.toBytes("zs" + i));
            list.add(put);
        }
        try {
            hTable.put(list);
        } catch (IOException e) {
            e.printStackTrace();
        }
        list.clear();
        log.info("添加数据成功..........................");

    }
    private static String getRandomNumber(){

        String ranStr = Math.random()+"";

        int pointIndex = ranStr.indexOf(".");

        return ranStr.substring(pointIndex+1, pointIndex+3);

    }
    /**
     * scan 设置查询条件的模糊查询
     * @param hTable
     * @throws IOException
     */
    private static void scanRecordByParam(Table hTable,String startRow) throws IOException {
        Scan scan = new Scan();
        //scan.addColumn(Bytes.toBytes("cf"),Bytes.toBytes("name"));
        scan.setStartRow(Bytes.toBytes("90-000000-1563011905980"));
        scan.setStopRow(Bytes.toBytes("90-009939-1563011905981"));
        Filter filter2 = new RowFilter(CompareFilter.CompareOp.GREATER,new RegexStringComparator(".*-1563011905970"));//90-009979
        Filter filter3 = new RowFilter(CompareFilter.CompareOp.LESS,new RegexStringComparator(".*-1563011905981"));//90-009979
       // Filter mFilter = new PrefixFilter(Bytes.toBytes("90-009829"));//通过用户id查询
        Filter isf=new InclusiveStopFilter(Bytes.toBytes("80-009839-1563011905979"));//OK包含了扫描的上限在结果之内
        FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ALL);
       filterList.addFilter(filter2);
        filterList.addFilter(filter3);
        List<Long> list = new ArrayList<Long>();
        list.add(1563011905970L);
        list.add(1563011905981L);
        TimestampsFilter filter = new TimestampsFilter(list);
     //  scan.setFilter(filterList);
        ResultScanner scanner = hTable.getScanner(scan);
        for (Result result : scanner) {
            System.out.println("rowKey:"+new String(result.getRow()));
            for (Cell cell : result.rawCells()) {
              //  System.out.println("列族:"+new String(CellUtil.cloneFamily(cell))+ " 列:"+new String(CellUtil.cloneQualifier(cell))+ " 值:"+new String(CellUtil.cloneValue(cell)));
            }

        }
    }



    /**
     * scan 设置时间范围查询
     * @param hTable
     * @throws IOException
     */
    private static void scanByRangeQuery(Table hTable,String startRow,String endRow) throws IOException {
        Scan scan = new Scan();
        scan.setStartRow(Bytes.toBytes(startRow));//范围查询
        scan.setStopRow(Bytes.toBytes(endRow));
        ResultScanner scanner = hTable.getScanner(scan);
        for (Result result : scanner) {
            System.out.println("rowKey:"+new String(result.getRow()));
            for (Cell cell : result.rawCells()) {
                //  System.out.println("列族:"+new String(CellUtil.cloneFamily(cell))+ " 列:"+new String(CellUtil.cloneQualifier(cell))+ " 值:"+new String(CellUtil.cloneValue(cell)));
            }
        }
    }
    /**
     * scan 设置时间范围查询
     * @param hTable
     * @throws IOException
     */
    private static void scanByRangeTimeQuery(Table hTable,String startRow,String endRow) throws IOException {
        Scan scan = new Scan();
        scan.setStartRow(Bytes.toBytes(startRow));//范围查询
        scan.setStopRow(Bytes.toBytes(endRow));
        ResultScanner scanner = hTable.getScanner(scan);
        for (Result result : scanner) {
            System.out.println("rowKey:"+new String(result.getRow()));
            for (Cell cell : result.rawCells()) {
                //  System.out.println("列族:"+new String(CellUtil.cloneFamily(cell))+ " 列:"+new String(CellUtil.cloneQualifier(cell))+ " 值:"+new String(CellUtil.cloneValue(cell)));
            }
        }
    }
    /**
     * scan 前缀过滤器
     * @param hTable
     * @throws IOException
     */
    private static void scanByPrefixQuery(Table hTable,String prefix) throws IOException {
        Scan scan = new Scan();
        Filter filter = new PrefixFilter(Bytes.toBytes(prefix));
        scan.setFilter(filter);
        ResultScanner scanner = hTable.getScanner(scan);
        for (Result result : scanner) {
            System.out.println("rowKey:"+new String(result.getRow()));
            for (Cell cell : result.rawCells()) {
                //  System.out.println("列族:"+new String(CellUtil.cloneFamily(cell))+ " 列:"+new String(CellUtil.cloneQualifier(cell))+ " 值:"+new String(CellUtil.cloneValue(cell)));
            }
        }
    }
    /**
     * scan 正则过滤器，根据用户id查询用户
     * 提取rowkey以01结尾数据
     * Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL,new RegexStringComparator(".*01$"));
     * 提取rowkey以包含201407的数据
     * Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL,new SubstringComparator("201407"));
     * 提取rowkey以123开头的数据
     * Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL,new BinaryPrefixComparator("123".getBytes()));
     * @param hTable
     * @throws IOException
     */
    private static void scanByRegexFilter(Table hTable,String userId) throws IOException {
        Scan scan = new Scan();
        Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL,new RegexStringComparator(".*-"+userId));//查询uid
        scan.setFilter(filter);
        ResultScanner scanner = hTable.getScanner(scan);
        for (Result result : scanner) {
            System.out.println("rowKey:"+new String(result.getRow()));
            for (Cell cell : result.rawCells()) {
                //  System.out.println("列族:"+new String(CellUtil.cloneFamily(cell))+ " 列:"+new String(CellUtil.cloneQualifier(cell))+ " 值:"+new String(CellUtil.cloneValue(cell)));
            }

        }
    }
    /**
     * scan 包含过滤器
     * @param hTable
     * @throws IOException
     */
    private static void scanByContainsFilter(Table hTable,String startRow,String param) throws IOException {
        Scan scan = new Scan();
        Filter filter = new RowFilter(CompareFilter.CompareOp.NO_OP,new SubstringComparator(param));
        scan.setFilter(filter);
        ResultScanner scanner = hTable.getScanner(scan);
        for (Result result : scanner) {
            System.out.println("rowKey:"+new String(result.getRow()));
            for (Cell cell : result.rawCells()) {
                //  System.out.println("列族:"+new String(CellUtil.cloneFamily(cell))+ " 列:"+new String(CellUtil.cloneQualifier(cell))+ " 值:"+new String(CellUtil.cloneValue(cell)));
            }

        }
    }
    /**
     * scan 通过rowkey过滤器进行查看
     * @param hTable
     * @throws IOException
     */
    private static void scanByRowKeyFilter(Table hTable,String rowKey) throws IOException {
        Scan scan = new Scan();
        Filter filter=new RowFilter(CompareFilter.CompareOp.GREATER_OR_EQUAL,new BinaryComparator(Bytes.toBytes(rowKey)));//OK筛选出匹配的所有的行
        scan.setFilter(filter);
        ResultScanner scanner = hTable.getScanner(scan);
        for (Result result : scanner) {
            System.out.println("rowKey:"+new String(result.getRow()));
            for (Cell cell : result.rawCells()) {
                //  System.out.println("列族:"+new String(CellUtil.cloneFamily(cell))+ " 列:"+new String(CellUtil.cloneQualifier(cell))+ " 值:"+new String(CellUtil.cloneValue(cell)));
            }

        }
    }



}