使用Java API操作HBase数据库；设置过滤器；设置查询优化；建立压缩预分区表；删除/增加数据

最新推荐文章于 2021-02-24 01:25:17 发布

无名一小卒

最新推荐文章于 2021-02-24 01:25:17 发布

阅读量420

点赞数

文章标签： JavaAPI操作HBase数据库

本文链接：https://blog.csdn.net/h1025372645/article/details/97816988

版权

HBase 专栏收录该内容

7 篇文章 1 订阅

订阅专栏

准备事项

该案例使用了Maven来管理依赖，下面是pom.xml文件

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.huadian.bigdata</groupId>
    <artifactId>hadoop</artifactId>
    <version>1.0-SNAPSHOT</version>
    
    <repositories>
        <repository>
            <id>aliyun</id>
            <url>http://maven.aliyun.com/nexus/content/groups/public/</url>
        </repository>
        <repository>
            <id>cloudera</id>
            <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
        </repository>
        <repository>
            <id>jboss</id>
            <url>http://repository.jboss.com/nexus/content/groups/public</url>
        </repository>
    </repositories>

    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <maven.compiler.source>1.7</maven.compiler.source>
        <maven.compiler.target>1.7</maven.compiler.target>
        <hadoop.version>2.7.3</hadoop.version>
        <hive.version>1.2.1</hive.version>
        <hbase.version>1.2.0-cdh5.7.6</hbase.version>
    </properties>

    <dependencies>

        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-server</artifactId>
            <version>${hbase.version}</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-client -->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>${hadoop.version}</version>
        </dependency>

        <!-- Hive Client -->
        <dependency>
            <groupId>org.apache.hive</groupId>
            <artifactId>hive-service</artifactId>
            <version>${hive.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hive</groupId>
            <artifactId>hive-exec</artifactId>
            <version>${hive.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hive</groupId>
            <artifactId>hive-jdbc</artifactId>
            <version>${hive.version}</version>
        </dependency>

    </dependencies>

    <build>
        <pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) -->
            <plugins>
                <plugin>
                    <artifactId>maven-clean-plugin</artifactId>
                    <version>3.0.0</version>
                </plugin>
                <!-- see http://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
                <plugin>
                    <artifactId>maven-resources-plugin</artifactId>
                    <version>3.0.2</version>
                </plugin>
                <plugin>
                    <artifactId>maven-compiler-plugin</artifactId>
                    <version>3.7.0</version>
                </plugin>
                <plugin>
                    <artifactId>maven-surefire-plugin</artifactId>
                    <version>2.20.1</version>
                </plugin>
                <plugin>
                    <artifactId>maven-jar-plugin</artifactId>
                    <version>3.0.2</version>
                </plugin>
                <plugin>
                    <artifactId>maven-install-plugin</artifactId>
                    <version>2.5.2</version>
                </plugin>
                <plugin>
                    <artifactId>maven-deploy-plugin</artifactId>
                    <version>2.8.2</version>
                </plugin>
            </plugins>
        </pluginManagement>
    </build>

</project>

使用时还需要hbase-site.xml配置文件，放到resource中

查询指定数据

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IOUtils;
import java.io.IOException;

public class A_GetDemo {
    public static void main(String[] args) {
        //1、读取配置信息
        Configuration conf = HBaseConfiguration.create();
        //System.out.println(conf);
        Connection conn = null;
        Table table = null;

        try {
            //2.获取连接
            conn = ConnectionFactory.createConnection( conf );
             //3.获取HBASE table的句柄，可以对表中的数据进行CURD操作
            table = conn.getTable( TableName.valueOf( "命名空间:表名" ) );
            //4、创建Get对象，就是rowkey
            Get get = new Get( Bytes.toBytes( "字段" ) );
           //5.根据rowkey去查询数据
            Result result = table.get( get );
            //6.解析数据
            System.out.println("RowKey:"+Bytes.toString( result.getRow() ));
            for (Cell cell:result.rawCells()) {
                String cf = Bytes.toString(CellUtil.cloneFamily( cell ));
                String filed = Bytes.toString(CellUtil.cloneQualifier( cell ));
                String value = Bytes.toString(CellUtil.cloneValue( cell ));
                System.out.println(cf+":"+filed +"->" +value);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }finally {
             //释放连接
            IOUtils.closeStream( table );
            IOUtils.closeStream( conn);
        }
    }
}

查询一定范围内的数据

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IOUtils;

import java.io.IOException;

public class B_ScanDemo {
    public static void main(String[] args) {
        //1、读取配置信息
        Configuration conf = HBaseConfiguration.create();
        //System.out.println(conf);
        Connection conn = null;
        Table table = null;

        try {
            //2.获取连接
            conn = ConnectionFactory.createConnection( conf );
            //3.获取HBASE table的句柄，可以对表中的数据进行CURD操作
            table = conn.getTable( TableName.valueOf( "ns1:sale_orders" ) );
              //5.根据rowkey去查询数据
           scanData(table);


        } catch (IOException e) {
            e.printStackTrace();
        }finally {
            IOUtils.closeStream( table );
            IOUtils.closeStream( conn);
        }
    }

    private static void scanData(Table table) {
        //创建Scan 扫描器实例对象
        Scan scan = new Scan();
        /**
         * 查询范围
         *      Rowkey满足前缀匹配的原则
         *      可以设置返回【startKey,stopkey）,设置的范围越小，查询越快，性能越高
         */
        scan.setStartRow( Bytes.toBytes( "开始范围" ) );
        scan.setStopRow( Bytes.toBytes( "结束范围" ) );
        /**
         * 设置，查询某一些列簇和列的值
         */
        scan.addFamily(  Bytes.toBytes("info") );
        scan.addColumn( Bytes.toBytes("info"),Bytes.toBytes("date") );
        scan.addColumn( Bytes.toBytes("info"),Bytes.toBytes("order_amt") );
        scan.addColumn( Bytes.toBytes("info"),Bytes.toBytes("order_id") );
         //解析数据
        try {
            ResultScanner scanner = table.getScanner( scan );
            for (Result result:scanner) {
                //6.解析数据
                System.out.println("RowKey:"+Bytes.toString( result.getRow() ));
                for (Cell cell:result.rawCells()) {
                    String cf = Bytes.toString(CellUtil.cloneFamily( cell ));
                    String filed = Bytes.toString(CellUtil.cloneQualifier( cell ));
                    String value = Bytes.toString(CellUtil.cloneValue( cell ));
                    System.out.println(cf+":"+filed +"->" +value);
                }
                System.out.println("---------------------------------");
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

查询过滤

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IOUtils;

import java.io.IOException;

public class B_ScanDemo {
    public static void main(String[] args) {
        //1、读取配置信息
        Configuration conf = HBaseConfiguration.create();
        //System.out.println(conf);
        Connection conn = null;
        Table table = null;

        try {
            //2.获取连接
            conn = ConnectionFactory.createConnection( conf );
            //3.获取HBASE table的句柄，可以对表中的数据进行CURD操作
            table = conn.getTable( TableName.valueOf( "ns1:sale_orders" ) );
              //5.根据rowkey去查询数据
           scanData(table);


        } catch (IOException e) {
            e.printStackTrace();
        }finally {
            IOUtils.closeStream( table );
            IOUtils.closeStream( conn);
        }
    }

    private static void scanData(Table table) {
        //创建Scan 扫描器实例对象
        Scan scan = new Scan();
        /**
         * 查询范围
         *      Rowkey满足前缀匹配的原则
         *      可以设置返回【startKey,stopkey）,设置的范围越小，查询越快，性能越高
         */
        scan.setStartRow( Bytes.toBytes( "434017_2015-04-21 00:00:00" ) );
        scan.setStopRow( Bytes.toBytes( "434017_2015-04-22 00:00:00" ) );
        /***
         * 设置，查询某一些列簇和列的值
         */
        scan.addFamily(  Bytes.toBytes("info") );
        scan.addColumn( Bytes.toBytes("info"),Bytes.toBytes("date") );
        scan.addColumn( Bytes.toBytes("info"),Bytes.toBytes("order_amt") );
        scan.addColumn( Bytes.toBytes("info"),Bytes.toBytes("order_id") );

        /**
         *  设置过滤器
         *   scan.setFilter( filter )
         *   设置一个过滤器，值 的过滤，对应的值满足某个要求，才符合查询要求
         */
        Filter filter = new SingleColumnValueFilter(
                Bytes.toBytes("列簇"),
                Bytes.toBytes("要过滤的字段"),
                CompareFilter.CompareOp.GREATER_OR_EQUAL, //大于等于
                Bytes.toBytes("字段具体值")
        );
        /**
            设置多个过滤器
        FilterList filterList = new FilterList();
        filterList.addFilter( filter );
        scan.setFilter( filterList );
        */
 
        try {
            ResultScanner scanner = table.getScanner( scan );
            for (Result result:scanner) {
                //6.解析数据
                System.out.println("RowKey:"+Bytes.toString( result.getRow() ));
                for (Cell cell:result.rawCells()) {
                    String cf = Bytes.toString(CellUtil.cloneFamily( cell ));
                    String filed = Bytes.toString(CellUtil.cloneQualifier( cell ));
                    String value = Bytes.toString(CellUtil.cloneValue( cell ));
                    System.out.println(cf+":"+filed +"->" +value);
                }
                System.out.println("---------------------------------");
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

设置查询优化

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IOUtils;

import java.io.IOException;

public class B_ScanDemo {
    public static void main(String[] args) {
        //1、读取配置信息
        Configuration conf = HBaseConfiguration.create();
        //System.out.println(conf);
        Connection conn = null;
        Table table = null;

        try {
            //2.获取连接
            conn = ConnectionFactory.createConnection( conf );
            //3.获取HBASE table的句柄，可以对表中的数据进行CURD操作
            table = conn.getTable( TableName.valueOf( "ns1:sale_orders" ) );
              //5.根据rowkey去查询数据
           scanData(table);


        } catch (IOException e) {
            e.printStackTrace();
        }finally {
            IOUtils.closeStream( table );
            IOUtils.closeStream( conn);
        }
    }

    private static void scanData(Table table) {
        //创建Scan 扫描器实例对象
        Scan scan = new Scan();
        /**
         * 查询范围
         *      Rowkey满足前缀匹配的原则
         *      可以设置返回【startKey,stopkey）,设置的范围越小，查询越快，性能越高
         */
        scan.setStartRow( Bytes.toBytes( "434017_2015-04-21 00:00:00" ) );
        scan.setStopRow( Bytes.toBytes( "434017_2015-04-22 00:00:00" ) );
        /***
         * 设置，查询某一些列簇和列的值
         */
        scan.addFamily(  Bytes.toBytes("info") );
        scan.addColumn( Bytes.toBytes("info"),Bytes.toBytes("date") );
        scan.addColumn( Bytes.toBytes("info"),Bytes.toBytes("order_amt") );
        scan.addColumn( Bytes.toBytes("info"),Bytes.toBytes("order_id") );

        /**
         *  设置过滤器
         *   scan.setFilter( filter )
         *   设置一个过滤器，值 的过滤，对应的值满足某个要求，才符合查询要求
         */
        Filter filter = new SingleColumnValueFilter(
                Bytes.toBytes("info"),
                Bytes.toBytes("order_amt"),
                CompareFilter.CompareOp.GREATER_OR_EQUAL, //大于等于
                Bytes.toBytes("2015-04-21 07:35:10")
        );
        /*
            设置多个过滤器
        FilterList filterList = new FilterList();
        filterList.addFilter( filter );
        scan.setFilter( filterList );
        */

        /*****************************设置 查询优化********************************/
        //优化一：表示每次获取一条数据的多少列，默认值-1，表示不设置
        //不能与filter连用
        scan.setBatch( 2 );


        //优化二：表示的是，每次RPC请求的记录数，默认值1，设置的值不能太大，使用内存
        //比如设置为1000，每次会从regionServer中读取1000条数据，这次给客户端返回100条
        //剩下的900条，下次直接在缓存里面拿
        scan.setCaching( 1 );

        //优化点三：这个属性要合理的设置
        //查询的数据 要不要 放到内存中进行缓存，使用的RegionServer的内存，
        //如果只是查询一次的话，就不要设置缓存
        scan.setCacheBlocks( true );


        try {
            ResultScanner scanner = table.getScanner( scan );
            for (Result result:scanner) {
                //6.解析数据
                System.out.println("RowKey:"+Bytes.toString( result.getRow() ));
                for (Cell cell:result.rawCells()) {
                    String cf = Bytes.toString(CellUtil.cloneFamily( cell ));
                    String filed = Bytes.toString(CellUtil.cloneQualifier( cell ));
                    String value = Bytes.toString(CellUtil.cloneValue( cell ));
                    System.out.println(cf+":"+filed +"->" +value);
                }
                System.out.println("---------------------------------");
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

建立压缩预分区表

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IOUtils;

import java.io.IOException;

public class C_CreateTableDemo {
    public static void main(String[] args) {
        //1、读取配置信息
        Configuration conf = HBaseConfiguration.create();
        //System.out.println(conf);
        Connection conn = null;
        Table table = null;
        HBaseAdmin admin = null;

        try {
            //2.获取连接
            conn = ConnectionFactory.createConnection( conf );
            admin = (HBaseAdmin) conn.getAdmin();

            //创建命名空间
            String namespace = "ns2";
            NamespaceDescriptor nsDesc = NamespaceDescriptor.create( namespace ).build();
            admin.createNamespace( nsDesc );

            /**
             * 创建表：表名，列簇，
             * 压缩，设置预分区、BLOCKCACHE
             */
            String tableName = namespace+":stu_info";
            //判定表是否存在，存在删除
            if(admin.tableExists( tableName )){
                //先禁用
                admin.disableTable( tableName );
                //再删除
                admin.deleteTable( tableName );
            }
            //HTableDescriptor:表名 + 列簇
            //a.表的描述符
            HTableDescriptor desc = new HTableDescriptor( TableName.valueOf( tableName ) );
            //b.列簇的描述符
            HColumnDescriptor family = new HColumnDescriptor( "info" );
            //设置列簇相关属性
            //设置属性一：是否在查询数据的时候，将结果进行缓存，针对用户自己定义表，一般是false
            family.setBlockCacheEnabled( false );

            //设置属性二：设置压缩
            family.setCompressionType( Compression.Algorithm.SNAPPY );

            //设置属性三：设置版本，如果是多版本的话，默认查询出来的是最新版本的数据
            //默认值是1 ，cell的数据只会保留一份
            family.setVersions( 1,3 );
            //将列簇添加到表中
            desc.addFamily( family );

            //设置的表的预分区
            byte[][] splitKeys = {
                    Bytes.toBytes("1"),Bytes.toBytes("3"),
                    Bytes.toBytes("5"),Bytes.toBytes("8")
            };
            admin.createTable( desc,splitKeys );

        } catch (IOException e) {
            e.printStackTrace();
        }finally {
            IOUtils.closeStream( admin);
            IOUtils.closeStream( conn);
        }
    }
}

插入数据

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IOUtils;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

public class D_PutDemo {
    public static void main(String[] args) {
        //1、读取配置信息
        Configuration conf = HBaseConfiguration.create();
        //System.out.println(conf);
        Connection conn = null;
        Table table = null;

        try {
            //2.获取连接
            conn = ConnectionFactory.createConnection( conf );
            //3.获取HBASE table的句柄，可以对表中的数据进行CURD操作
            table = conn.getTable( TableName.valueOf( "ns2:stu_info" ) );
            //putData(table);
            delete(table);
        } catch (IOException e) {
            e.printStackTrace();
        }finally {
            IOUtils.closeStream( table );
            IOUtils.closeStream( conn);
        }
    }

    /**
     *  向表中插入数据，
     *  使用场景：一条记录（一行数据）的所有列一并存储到HBase中
     *      添加用户（id，name,age）
     */
    private static void putData(Table table) {
        //模拟数据
        HashMap<String, String> stuMap = new HashMap<>();
        stuMap.put( "id","1122" );
        stuMap.put( "name","zs22" );
        stuMap.put( "age","1722" );
        stuMap.put( "address","zhejaingjiaxing22" );

        //使用put插入数据
        //put "ns2:stu_info" rowkey,"CF:Column"  值
        Put put = new Put(Bytes.toBytes( "100001" ));
        byte [] cf = Bytes.toBytes( "info" );

        for (Map.Entry<String,String> entry:stuMap.entrySet()) {
            put.addColumn( cf, Bytes.toBytes( entry.getKey() ), Bytes.toBytes( entry.getValue() )   );
        }


        try {
            //将数据插入到表中：单条数据插入
            table.put( put );
            //多条数据插入
            //void put(List<Put> puts)
        } catch (IOException e) {
            e.printStackTrace();
        }

    }


    //删除数据

    /**
     * HBase表中，没有真正意义上的删除（执行命令，并不会立马将数据删除），
     *  而是给数据 打标签，所以查询时候看不到
     *
     *  真正删除，是在每个Region进行MajorCompaction的时候进行
     *  HBase为了防止小文件过多，HBase在必要时候会将这些小文件(store file)合并成大文件，
     *      这个过程称为Compaction
     *  MajorCompaction:
     *  minorCompaction
     *
     * @param table
     */
    private static void delete(Table table){
        Delete delete = new Delete(Bytes.toBytes( "100001" ));
        //delete.addFamily( Bytes.toBytes( "info" ) );
        delete.addColumn( Bytes.toBytes( "info" ),Bytes.toBytes( "id" ) );
        try {
            table.delete( delete );
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

无名一小卒

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
打赏
0
评论
使用Java API操作HBase数据库；设置过滤器；设置查询优化；建立压缩预分区表；删除/增加数据

准备事项该案例使用了Maven来管理依赖，下面是pom.xml文件<?xml version="1.0" encoding="UTF-8"?><project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" ...
复制链接

扫一扫