HBase java API 使用实例(二)——查询、分页

9 篇文章 0 订阅

HBase java API 使用实例(二)——查询、分页

上一篇HBase文章中已经介绍了如何使用java API操作HBase(http://blog.csdn.net/wild46cat/article/details/53306621),现在介绍一下如何使用java API对HBase进行查询。

本文主要内容:
1、使用java API对HBase进行常用的查询(行、列过滤)。
2、介绍如何进行分页,这里的分页不是最终的解决办法,只是目前自己测试使用时的一种方案(由于数据被split之后会分布在各个Region Server上,如果没有合理的一个RowKey策略的话,分页是不适用的)。
3、最后会附上源码。

好啦,下面上货:
HBaseUtils.java(代码是全的,主要增加的是query查询的一些方法)
package com.xueyoucto.hbasett;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.protobuf.generated.AccessControlProtos;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * Created by Administrator on 2016-11-23.
 */
public class HBaseUtils {
    static Configuration cfg;
    static Connection connection;

    static {
        cfg = HBaseConfiguration.create();
        try {
            connection = ConnectionFactory.createConnection(cfg);
        } catch (IOException e) {
            e.printStackTrace();
        }
        System.out.println(cfg.get("hbase.master"));
    }

    //新建表
    public static boolean create(String tableName, String columnFamily)
            throws Exception {
        HBaseAdmin admin = (HBaseAdmin) connection.getAdmin();

        if (admin.tableExists(tableName)) {
            System.out.println(tableName + " exists!");
            return false;
        } else {
            String[] columnFamilyArray = columnFamily.split(",");
            HColumnDescriptor[] hColumnDescriptor = new HColumnDescriptor[columnFamilyArray.length];
            for (int i = 0; i < hColumnDescriptor.length; i++) {
                hColumnDescriptor[i] = new HColumnDescriptor(columnFamilyArray[i]);
            }
            HTableDescriptor familyDesc = new HTableDescriptor(TableName.valueOf(tableName));
            for (HColumnDescriptor columnDescriptor : hColumnDescriptor) {
                familyDesc.addFamily(columnDescriptor);
            }
            HTableDescriptor tableDesc = new HTableDescriptor(TableName.valueOf(tableName), familyDesc);

            admin.createTable(tableDesc);
            System.out.println(tableName + " create successfully!");
            return true;
        }
    }

    //插入数据
    public static boolean put(String tablename, String row, String columnFamily,
                              String qualifier, String data) throws Exception {
        Table table = connection.getTable(TableName.valueOf(tablename));
        Put put = new Put(Bytes.toBytes(row));
        put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(qualifier),
                Bytes.toBytes(data));
        table.put(put);
        System.out.println("put '" + row + "', '" + columnFamily + ":" + qualifier
                + "', '" + data + "'");
        return true;
    }

    //把result转换成map,方便返回json数据
    public static Map<String, Object> resultToMap(Result result) {
        Map<String, Object> resMap = new HashMap<String, Object>();
        List<Cell> listCell = result.listCells();
        Map<String, Object> tempMap = new HashMap<String, Object>();
        String rowname = "";
        List<String> familynamelist = new ArrayList<String>();
        for (Cell cell : listCell) {
            byte[] rowArray = cell.getRowArray();
            byte[] familyArray = cell.getFamilyArray();
            byte[] qualifierArray = cell.getQualifierArray();
            byte[] valueArray = cell.getValueArray();
            int rowoffset = cell.getRowOffset();
            int familyoffset = cell.getFamilyOffset();
            int qualifieroffset = cell.getQualifierOffset();
            int valueoffset = cell.getValueOffset();
            int rowlength = cell.getRowLength();
            int familylength = cell.getFamilyLength();
            int qualifierlength = cell.getQualifierLength();
            int valuelength = cell.getValueLength();

            byte[] temprowarray = new byte[rowlength];
            System.arraycopy(rowArray, rowoffset, temprowarray, 0, rowlength);
            String temprow = Bytes.toString(temprowarray);
//            System.out.println(Bytes.toString(temprowarray));

            byte[] tempqulifierarray = new byte[qualifierlength];
            System.arraycopy(qualifierArray, qualifieroffset, tempqulifierarray, 0, qualifierlength);
            String tempqulifier = Bytes.toString(tempqulifierarray);
//            System.out.println(Bytes.toString(tempqulifierarray));

            byte[] tempfamilyarray = new byte[familylength];
            System.arraycopy(familyArray, familyoffset, tempfamilyarray, 0, familylength);
            String tempfamily = Bytes.toString(tempfamilyarray);
//            System.out.println(Bytes.toString(tempfamilyarray));

            byte[] tempvaluearray = new byte[valuelength];
            System.arraycopy(valueArray, valueoffset, tempvaluearray, 0, valuelength);
            String tempvalue = Bytes.toString(tempvaluearray);
//            System.out.println(Bytes.toString(tempvaluearray));


            tempMap.put(tempfamily + ":" + tempqulifier, tempvalue);
//            long t= cell.getTimestamp();
//            tempMap.put("timestamp",t);
            rowname = temprow;
            String familyname = tempfamily;
            if (familynamelist.indexOf(familyname) < 0) {
                familynamelist.add(familyname);
            }
        }
        resMap.put("rowname", rowname);
        for (String familyname : familynamelist) {
            HashMap<String, Object> tempFilterMap = new HashMap<String, Object>();
            for (String key : tempMap.keySet()) {
                String[] keyArray = key.split(":");
                if (keyArray[0].equals(familyname)) {
                    tempFilterMap.put(keyArray[1], tempMap.get(key));
                }
            }
            resMap.put(familyname, tempFilterMap);
        }

        return resMap;
    }

    //查看某行
    public static String get(String tablename, String row) throws Exception {
        Table table = connection.getTable(TableName.valueOf(tablename));
        Get get = new Get(Bytes.toBytes(row));
        Result result = table.get(get);
        System.out.println("Get: " + result);
        return resultToMap(result).toString();
    }

    //查看全表
    public static String scan(String tablename) throws Exception {
        Table table = connection.getTable(TableName.valueOf(tablename));
        Scan s = new Scan();
        ResultScanner rs = table.getScanner(s);

        List<Map<String, Object>> resList = new ArrayList<Map<String, Object>>();
        for (Result r : rs) {
            Map<String, Object> tempmap = resultToMap(r);
            resList.add(tempmap);
        }
        return resList.toString();
    }

    //删除表
    public static boolean delete(String tableName) throws IOException {
        HBaseAdmin admin = (HBaseAdmin) connection.getAdmin();
        if (admin.tableExists(tableName)) {
            try {
                admin.disableTable(tableName);
                admin.deleteTable(tableName);
            } catch (Exception e) {
                e.printStackTrace();
                return false;
            }
        }
        return true;
    }

    //删除ColumnFamily
    public static boolean deleteColumnFamily(String tableName, String columnFamilyName) throws IOException {
        HBaseAdmin admin = (HBaseAdmin) connection.getAdmin();
        if (admin.tableExists(tableName)) {
            try {
                admin.deleteColumn(tableName, columnFamilyName);
            } catch (Exception e) {
                e.printStackTrace();
                return false;
            }
        }
        return true;
    }

    //删除row
    public static boolean deleteRow(String tableName, String rowName) throws IOException {
        HBaseAdmin admin = (HBaseAdmin) connection.getAdmin();
        Table table = connection.getTable(TableName.valueOf(tableName));
        if (admin.tableExists(tableName)) {
            try {
                Delete delete = new Delete(rowName.getBytes());
                table.delete(delete);
            } catch (Exception e) {
                e.printStackTrace();
                return false;
            }
        }
        return true;
    }

    //删除qualifier
    public static boolean deleteQualifier(String tableName, String rowName, String columnFamilyName, String qualifierName) throws IOException {
        HBaseAdmin admin = (HBaseAdmin) connection.getAdmin();
        Table table = connection.getTable(TableName.valueOf(tableName));
        if (admin.tableExists(tableName)) {
            try {
                Delete delete = new Delete(rowName.getBytes());
                delete.addColumns(columnFamilyName.getBytes(), qualifierName.getBytes());
                table.delete(delete);
            } catch (Exception e) {
                e.printStackTrace();
                return false;
            }
        }
        return true;
    }

    //单个qualifier的值等于
    public static String queryequal(String tablename, String columnFamily, String qualifier, String data) throws Exception {
        //某列等于data的
        Filter filter = new SingleColumnValueFilter(Bytes.toBytes(columnFamily), Bytes.toBytes(qualifier),
                CompareFilter.CompareOp.EQUAL, Bytes.toBytes(data));
        FilterList filterList = new FilterList();
        filterList.addFilter(filter);

        return query(tablename, filterList);
    }

    //查询年龄在mindata和maxdata之间的数据
    public static String queryagebetween(String tablename, String columnFamily, String qualifier, String mindata, String maxdata) throws Exception {
        Filter filter = new SingleColumnValueFilter(Bytes.toBytes(columnFamily), Bytes.toBytes(qualifier),
                CompareFilter.CompareOp.LESS_OR_EQUAL, Bytes.toBytes(maxdata));
        Filter filter1 = new SingleColumnValueFilter(Bytes.toBytes(columnFamily), Bytes.toBytes(qualifier),
                CompareFilter.CompareOp.GREATER_OR_EQUAL, Bytes.toBytes(mindata));
        FilterList filterList = new FilterList();
        filterList.addFilter(filter);
        filterList.addFilter(filter1);
        return query(tablename, filterList);
    }

    //过滤列
    public static String querycolumn(String tablename, String prefix) throws Exception {
        Filter filter = new ColumnPrefixFilter(Bytes.toBytes(prefix));
        FilterList filterList = new FilterList();
        filterList.addFilter(filter);
        return query(tablename, filterList);
    }

    //得到分页
    public static String queryRowCount(String tablename, String startrowname, String count) throws Exception {
        Filter filter = new PageFilter(Integer.parseInt(count));
        Filter filter1 = new SingleColumnValueFilter(Bytes.toBytes("author"), Bytes.toBytes("age"),
                CompareFilter.CompareOp.EQUAL, Bytes.toBytes("30"));
        FilterList filterList = new FilterList();
//        filterList.addFilter(filter1);
        filterList.addFilter(filter);

        Table table = connection.getTable(TableName.valueOf(tablename));
        Scan s = new Scan(Bytes.toBytes(startrowname));
        s.setFilter(filterList);
        ResultScanner rs = table.getScanner(s);

        List<Map<String, Object>> resList = new ArrayList<Map<String, Object>>();
        for (Result r : rs) {
            Map<String, Object> tempmap = resultToMap(r);
            resList.add(tempmap);
        }

        return resList.toString();
    }

    //所有查询都需要的公共query方法
    public static String query(String tablename, FilterList filterList) throws Exception {
        Table table = connection.getTable(TableName.valueOf(tablename));
        Scan s = new Scan();
        s.setFilter(filterList);
        ResultScanner rs = table.getScanner(s);

        List<Map<String, Object>> resList = new ArrayList<Map<String, Object>>();
        for (Result r : rs) {
            Map<String, Object> tempmap = resultToMap(r);
            resList.add(tempmap);
        }
        return resList.toString();
    }

    //测试用,添加数据
    public static boolean testInsertdata() throws Exception {
        Table table = connection.getTable(TableName.valueOf("test"));
        for (int i = 20; i < 12000; i++) {
            Put put = new Put(Bytes.toBytes("r" + String.valueOf(i)));
            put.addColumn(Bytes.toBytes("author"), Bytes.toBytes("age"),
                    Bytes.toBytes(String.valueOf(i)));
            table.put(put);
            System.out.println("put '" + "r" + String.valueOf(i)+ "', '" + "author" + ":" + "age"
                    + "', '" + String.valueOf(i) + "'");
        }
        return true;
    }

    //测试用删除数据
    public static boolean deleteInsertdata() throws Exception{
        HBaseAdmin admin = (HBaseAdmin) connection.getAdmin();
        Table table = connection.getTable(TableName.valueOf("test"));
        if (admin.tableExists("test")) {
            try {
                for (int i = 20; i < 12000; i++) {
                    Delete delete = new Delete(("r" + String.valueOf(i)).getBytes());
                    table.delete(delete);
                }
            } catch (Exception e) {
                e.printStackTrace();
                return false;
            }
        }
        return true;
    }
}

对于新增加的方法进行测试:
1、测试查询某个qualifier为特定值的列。例如:博客作者中年龄是30的内容。
package com.xueyoucto.hbasett;

import javax.servlet.ServletException;
import javax.servlet.annotation.WebServlet;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
import java.io.PrintWriter;

/**
 * Created by Administrator on 2016-11-24.
 */
@WebServlet(name = "ServletQuery", urlPatterns = "/HBaseServletQueryEqual")
public class ServletQuery extends HttpServlet {
    protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
        doGet(request, response);
    }

    protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
        String tname = request.getParameter("tablename");
        String columnFamily = request.getParameter("columnfamily");
        String qualifier = request.getParameter("qualifier");
        String data = request.getParameter("data");
        String msg = "-";
        try {
            msg = HBaseUtils.queryequal(tname, columnFamily,qualifier,data);
        } catch (Exception e) {
            e.printStackTrace();
        }

        response.setCharacterEncoding("utf-8");
        response.setContentType("text/html;charset=utf-8");
        PrintWriter out = response.getWriter();
        out.println(msg);
        out.flush();
        out.close();
    }
}

首先看一下所有数据:

查询博客作者年龄是30岁的数据(有脏数据,为了测试后面的功能):

注意:所有的数据必须都有author:age这个quailifer,不然,默认情况下如果没有这个列,那么依然会被查出来。
2、测试博客作者年龄在40到50岁之间的内容:
package com.xueyoucto.hbasett;

import javax.servlet.ServletException;
import javax.servlet.annotation.WebServlet;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
import java.io.PrintWriter;

/**
 * Created by Administrator on 2016-11-24.
 */
@WebServlet(name = "ServletQueryAgeBetween",urlPatterns = "/HBaseServletQueryAgeBetween")
public class ServletQueryAgeBetween extends HttpServlet {
    protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
        doGet(request,response);
    }

    protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
        String tname = request.getParameter("tablename");
        String columnFamily = request.getParameter("columnfamily");
        String qualifier = request.getParameter("qualifier");
        String mindata = request.getParameter("mindata");
        String maxdata = request.getParameter("maxdata");
        String msg = "-";
        try {
            msg = HBaseUtils.queryagebetween(tname, columnFamily, qualifier, mindata, maxdata);
        } catch (Exception e) {
            e.printStackTrace();
        }

        response.setCharacterEncoding("utf-8");
        response.setContentType("text/html;charset=utf-8");
        PrintWriter out = response.getWriter();
        out.println(msg);
        out.flush();
        out.close();
    }
}
首先是所有数据:

查询:

注意:这里的40和50是字符串查询,也就是说,如果有个age=400,那么依然会被包含在结果中。比较大小的时候是通过ASCII码。
3、测试查询后的结果只取指定列(查询blog作者的年龄):
package com.xueyoucto.hbasett;

import javax.servlet.ServletException;
import javax.servlet.annotation.WebServlet;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
import java.io.PrintWriter;

/**
 * Created by Administrator on 2016-11-24.
 */
@WebServlet(name = "ServletQueryColumn", urlPatterns = "/HBaseServletQueryColumn")
public class ServletQueryColumn extends HttpServlet {
    protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
        doGet(request, response);
    }

    protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
        String tname = request.getParameter("tablename");
        String prefix = request.getParameter("prefix");
        String msg = "-";
        try {
            msg = HBaseUtils.querycolumn(tname, prefix);
        } catch (Exception e) {
            e.printStackTrace();
        }

        response.setCharacterEncoding("utf-8");
        response.setContentType("text/html;charset=utf-8");
        PrintWriter out = response.getWriter();
        out.println(msg);
        out.flush();
        out.close();
    }
}
首先是全部数据:

执行查询:


4、测试分页(得到从startrow开始的5条数据):
package com.xueyoucto.hbasett;

import javax.servlet.ServletException;
import javax.servlet.annotation.WebServlet;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
import java.io.PrintWriter;

/**
 * Created by Administrator on 2016-11-24.
 */
@WebServlet(name = "ServletQueryCount", urlPatterns = "/HBaseServletQueryCount")
public class ServletQueryCount extends HttpServlet {
    protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
        doGet(request, response);
    }

    protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
        String tname = request.getParameter("tablename");
        String startrowname = request.getParameter("startrowname");
        String count = request.getParameter("count");
        String msg = "-";
        try {
            msg = HBaseUtils.queryRowCount(tname, startrowname, count);
        } catch (Exception e) {
            e.printStackTrace();
        }

        response.setCharacterEncoding("utf-8");
        response.setContentType("text/html;charset=utf-8");
        PrintWriter out = response.getWriter();
        out.println(msg);
        out.flush();
        out.close();
    }
}
首先是全部数据:

执行查询:

注意:这里的查询出来的数据有些不对,应该是只有5条记录。
原因是这样的,先看下图:


注意看,对于表test,我把它按照rowkey进行split了,分成了三个Region。
再看下图,PageFilter类的说明:

大概的意思就是说PageFilter是FilterBase的一个实现类,在查询的时候,如果大于filter中设置的number,那么就会停止。需要注意的是,这个filter不能保证返回给client的数目就一定小于等于设置的pagesize。原因是这样的,filter是分别过滤不同region server上的数据,而这些regionserver大部分情况下是分开的。但是,如果能够保证某个表是在单独的region上,那么返回的返回的client的数据就会小于等于设置的pagesize。

好,那么现在有两种方法保证分页的正确性:
1、保证数据不会被split,也就是数据始终在一个region上。
2、被查询出的数据都在一个region上。

首先这两种方式都可以。各有各的好处。首先第一种的优点是方便,缺点明显,就是可能导致单点负载较高。第二种的优点是负载均衡,缺点是需要根据业务设置rowkey,从而保证同一业务的数据根据设置好的规则split后能够被分配到相同的region中。然后根据startrow和PagerFilter设置的pagesize进行分页操作(略了,有兴趣可以自己实现以下)。

源码下载地址:
http://download.csdn.net/detail/wild46cat/9692647
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值