HBase JAVA API 操作HBase数据库,设置查询优化，设置过滤器

最新推荐文章于 2024-03-20 15:51:19 发布

isOllie

最新推荐文章于 2024-03-20 15:51:19 发布

阅读量443

点赞数 2

分类专栏： HBase 文章标签： HBase 查询优化过滤器 JavaAPI

本文链接：https://blog.csdn.net/qq_39141486/article/details/97826921

版权

HBase 专栏收录该内容

5 篇文章 0 订阅

订阅专栏

在使用JAVA操作HBase数据库做些前题准备

创建一个Java项目/Maven项目
导入依赖的Jar包
导入依赖的配置文件

我用的是maven工程

Jar包导入

<properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <maven.compiler.source>1.7</maven.compiler.source>
        <maven.compiler.target>1.7</maven.compiler.target>
        <hadoop.version>2.7.3</hadoop.version>
        <hive.version>1.2.1</hive.version>
        <hbase.version>1.2.0-cdh5.7.6</hbase.version>
</properties>

<dependencies>

        <!-- Hadoop依赖jar包-->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
        <!-- Hive Client -->
        <dependency>
            <groupId>org.apache.hive</groupId>
            <artifactId>hive-service</artifactId>
            <version>${hive.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hive</groupId>
            <artifactId>hive-exec</artifactId>
            <version>${hive.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hive</groupId>
            <artifactId>hive-jdbc</artifactId>
            <version>${hive.version}</version>
        </dependency>
        <!--HBase依赖jar包-->
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-server</artifactId>
            <version>${hbase.version}</version>
        </dependency>
    </dependencies>

配置文件导入

在这里插入图片描述

代码实现

public class ScanDemo {
    public static void main(String[] args) {
        //1、读取配置信息
        Configuration conf = HBaseConfiguration.create();
        //System.out.println(conf);
        Connection conn = null;
        Table table = null;

        try {
            //2.获取连接
            conn = ConnectionFactory.createConnection( conf );
            //3.获取HBASE table的句柄，可以对表中的数据进行CURD操作
            table = conn.getTable( TableName.valueOf( "ns1:sale_orders" ) );
            //4.根据rowkey去查询数据
           scanData(table);
        } catch (IOException e) {
            e.printStackTrace();
        }finally {
            IOUtils.closeStream( table );
            IOUtils.closeStream( conn);
        }
    }

    private static void scanData(Table table) {
        //创建Scan 扫描器实例对象
        Scan scan = new Scan();
        /**
         * 查询范围
         *      Rowkey满足前缀匹配的原则
         *      可以设置返回【startKey,stopkey）,设置的范围越小，查询越快，性能越高
         */
        scan.setStartRow( Bytes.toBytes( "434017_2015-04-21 00:00:00" ) );
        scan.setStopRow( Bytes.toBytes( "434017_2015-04-22 00:00:00" ) );
        /***
         * 设置，查询某一些列簇和列的值
         */
        scan.addFamily(  Bytes.toBytes("info") );
        scan.addColumn( Bytes.toBytes("info"),Bytes.toBytes("date") );
        scan.addColumn( Bytes.toBytes("info"),Bytes.toBytes("order_amt") );
        scan.addColumn( Bytes.toBytes("info"),Bytes.toBytes("order_id") );
        /**
         *  设置过滤器
         *   scan.setFilter( filter )
         *   设置一个过滤器，值 的过滤，对应的值满足某个要求，才符合查询要求
         */
        Filter filter = new SingleColumnValueFilter(
                Bytes.toBytes("info"),
                Bytes.toBytes("order_amt"),
                CompareFilter.CompareOp.GREATER_OR_EQUAL, //大于等于
                Bytes.toBytes("2015-04-21 07:35:10")
        );
        /*
            设置多个过滤器
        FilterList filterList = new FilterList();
        filterList.addFilter( filter );
        scan.setFilter( filterList );
        */

        /*****************************设置 查询优化********************************/
        //优化一：表示每次获取一条数据的多少列，默认值-1，表示不设置
        //不能与filter连用
        scan.setBatch( 2 );


        //优化二：表示的是，每次RPC请求的记录数，默认值1，设置的值不能太大，使用内存
        //比如设置为1000，每次会从regionServer中读取1000条数据，这次给客户端返回100条
        //剩下的900条，下次直接在缓存里面拿
        scan.setCaching( 1 );

        //优化点三：这个属性要合理的设置
        //查询的数据 要不要 放到内存中进行缓存，使用的RegionServer的内存，
        //如果只是查询一次的话，就不要设置缓存
        scan.setCacheBlocks( true );
        
        try {
            ResultScanner scanner = table.getScanner( scan );
            for (Result result:scanner) {
                //6.解析数据
                System.out.println("RowKey:"+Bytes.toString( result.getRow() ));
                for (Cell cell:result.rawCells()) {
                    String cf = Bytes.toString(CellUtil.cloneFamily( cell ));
                    String filed = Bytes.toString(CellUtil.cloneQualifier( cell ));
                    String value = Bytes.toString(CellUtil.cloneValue( cell ));
                    System.out.println(cf+":"+filed +"->" +value);
                }
                System.out.println("---------------------------------");
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}