hbaseAPI1714145080

HBasejava代码开发

熟练掌握通过使用java代码实现HBase数据库当中的数据增删改查的操作,特别是各种查询,熟练运用

第一步:创建maven工程,导入jar包

<repositories>
    <repository>
        <id>cloudera</id>
        <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
    </repository>
</repositories>
<dependencies>
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-client</artifactId>
        <version>2.6.0-mr1-cdh5.14.0</version>
    </dependency>
    <dependency>
        <groupId>org.apache.hbase</groupId>
        <artifactId>hbase-client</artifactId>
        <version>1.2.0-cdh5.14.0</version>
    </dependency>
    <dependency>
        <groupId>org.apache.hbase</groupId>
        <artifactId>hbase-server</artifactId>
        <version>1.2.0-cdh5.14.0</version>
    </dependency>
    <dependency>
        <groupId>junit</groupId>
        <artifactId>junit</artifactId>
        <version>4.12</version>
        <scope>test</scope>
    </dependency>
    <dependency>
        <groupId>org.testng</groupId>
        <artifactId>testng</artifactId>
        <version>6.14.3</version>
        <scope>test</scope>

    </dependency>
    <dependency>
        <groupId>org.junit.jupiter</groupId>
        <artifactId>junit-jupiter-api</artifactId>
        <version>RELEASE</version>
        <scope>compile</scope>
    </dependency>
</dependencies>
<build>
    <plugins>
        <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-compiler-plugin</artifactId>
            <version>3.0</version>
            <configuration>
                <source>1.8</source>
                <target>1.8</target>
                <encoding>UTF-8</encoding>
                <!-- <verbal>true</verbal>-->
            </configuration>
        </plugin>
        <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-shade-plugin</artifactId>
            <version>2.2</version>
            <executions>
                <execution>
                    <phase>package</phase>
                    <goals>
                        <goal>shade</goal>
                    </goals>
                    <configuration>
                        <filters>
                            <filter>
                                <artifact>*:*</artifact>
                                <excludes>
                                    <exclude>META-INF/*.SF</exclude>
                                    <exclude>META-INF/*.DSA</exclude>
                                    <exclude>META-INF/*/RSA</exclude>
                                </excludes>
                            </filter>
                        </filters>
                    </configuration>
                </execution>
            </executions>
        </plugin>
    </plugins>
</build>


 

 

 

 

 

 

 

第二步:开发javaAPI操作HBase表数据

1、创建myuser

public static   void createTable() throws IOException {
     Configuration conf =new Configuration();
    //连接hbase集群不需要指定hbase主节点的ip地址和端口号
    conf.set("hbase.zookeeper.quorum","node01:2181,node02:2181,node03:2181");
     //创建连接对象
    Connection connection = ConnectionFactory.createConnection(conf);
    //获取连接对象,创建一张表
    //获取管理员对象,来对数据库进行DDL的操作
    Admin admin = connection.getAdmin();
     //指定我们的表名
    TableName myuser = TableName.valueOf("myuser");
    HTableDescriptor hTableDescriptor = new HTableDescriptor(myuser);
     //指定两个列族
    HColumnDescriptor f1 = new HColumnDescriptor("f1");
    HColumnDescriptor f2 = new HColumnDescriptor("f2");
    hTableDescriptor.addFamily(f1);
    hTableDescriptor.addFamily(f2);

    admin.createTable(hTableDescriptor);
    admin.close();
    connection.close();

}

2、向表中添加数据

/**

     * 插入数据

     */ 

    public  void  addDatas() throws IOException {

        //获取连接

      Configuration conf =new Configuration();

 

        conf.set("hbase.zookeeper.quorum", "node01:2181,node02:2181");

        Connection connection = ConnectionFactory.createConnection(conf);

        //获取表

        Table myuser = connection.getTable(TableName.valueOf("myuser"));

        //创建put对象,并指定rowkey

        Put put = new Put("0001".getBytes());

        put.addColumn("f1".getBytes(),"id".getBytes(), Bytes.toBytes(1));

        put.addColumn("f1".getBytes(),"name".getBytes(), Bytes.toBytes("张三"));

        put.addColumn("f1".getBytes(),"age".getBytes(), Bytes.toBytes(18));

 

        put.addColumn("f2".getBytes(),"address".getBytes(), Bytes.toBytes("地球人"));

        put.addColumn("f2".getBytes(),"phone".getBytes(), Bytes.toBytes("15874102589"));

        //插入数据

        myuser.put(put);

        //关闭表

        myuser.close();

 

    }

 

3、查询数据

初始化一批数据到HBase当用于查询

    public void insertBatchData() throws IOException {

 

        //获取连接

        Configuration conf =new Configuration();

        conf.set("hbase.zookeeper.quorum", "node01:2181,node02:2181");

        Connection connection = ConnectionFactory.createConnection(conf);

        //获取表

        Table myuser = connection.getTable(TableName.valueOf("myuser"));

        //创建put对象,并指定rowkey

        Put put = new Put("0002".getBytes());

        put.addColumn("f1".getBytes(),"id".getBytes(),Bytes.toBytes(1));

        put.addColumn("f1".getBytes(),"name".getBytes(),Bytes.toBytes("曹操"));

        put.addColumn("f1".getBytes(),"age".getBytes(),Bytes.toBytes(30));

        put.addColumn("f2".getBytes(),"sex".getBytes(),Bytes.toBytes("1"));

        put.addColumn("f2".getBytes(),"address".getBytes(),Bytes.toBytes("沛国谯县"));

        put.addColumn("f2".getBytes(),"phone".getBytes(),Bytes.toBytes("16888888888"));

        put.addColumn("f2".getBytes(),"say".getBytes(),Bytes.toBytes("helloworld"));

 

        Put put2 = new Put("0003".getBytes());

        put2.addColumn("f1".getBytes(),"id".getBytes(),Bytes.toBytes(2));

        put2.addColumn("f1".getBytes(),"name".getBytes(),Bytes.toBytes("刘备"));

        put2.addColumn("f1".getBytes(),"age".getBytes(),Bytes.toBytes(32));

        put2.addColumn("f2".getBytes(),"sex".getBytes(),Bytes.toBytes("1"));

        put2.addColumn("f2".getBytes(),"address".getBytes(),Bytes.toBytes("幽州涿郡涿县"));

        put2.addColumn("f2".getBytes(),"phone".getBytes(),Bytes.toBytes("17888888888"));

        put2.addColumn("f2".getBytes(),"say".getBytes(),Bytes.toBytes("talk is cheap , show me the code"));

 

 

        Put put3 = new Put("0004".getBytes());

        put3.addColumn("f1".getBytes(),"id".getBytes(),Bytes.toBytes(3));

        put3.addColumn("f1".getBytes(),"name".getBytes(),Bytes.toBytes("孙权"));

        put3.addColumn("f1".getBytes(),"age".getBytes(),Bytes.toBytes(35));

        put3.addColumn("f2".getBytes(),"sex".getBytes(),Bytes.toBytes("1"));

        put3.addColumn("f2".getBytes(),"address".getBytes(),Bytes.toBytes("下邳"));

        put3.addColumn("f2".getBytes(),"phone".getBytes(),Bytes.toBytes("12888888888"));

        put3.addColumn("f2".getBytes(),"say".getBytes(),Bytes.toBytes("what are you 弄啥嘞!"));

 

        Put put4 = new Put("0005".getBytes());

        put4.addColumn("f1".getBytes(),"id".getBytes(),Bytes.toBytes(4));

        put4.addColumn("f1".getBytes(),"name".getBytes(),Bytes.toBytes("诸葛亮"));

        put4.addColumn("f1".getBytes(),"age".getBytes(),Bytes.toBytes(28));

        put4.addColumn("f2".getBytes(),"sex".getBytes(),Bytes.toBytes("1"));

        put4.addColumn("f2".getBytes(),"address".getBytes(),Bytes.toBytes("四川隆中"));

        put4.addColumn("f2".getBytes(),"phone".getBytes(),Bytes.toBytes("14888888888"));

        put4.addColumn("f2".getBytes(),"say".getBytes(),Bytes.toBytes("出师表你背了嘛"));

 

        Put put5 = new Put("0005".getBytes());

        put5.addColumn("f1".getBytes(),"id".getBytes(),Bytes.toBytes(5));

        put5.addColumn("f1".getBytes(),"name".getBytes(),Bytes.toBytes("司马懿"));

        put5.addColumn("f1".getBytes(),"age".getBytes(),Bytes.toBytes(27));

        put5.addColumn("f2".getBytes(),"sex".getBytes(),Bytes.toBytes("1"));

        put5.addColumn("f2".getBytes(),"address".getBytes(),Bytes.toBytes("哪里人有待考究"));

        put5.addColumn("f2".getBytes(),"phone".getBytes(),Bytes.toBytes("15888888888"));

        put5.addColumn("f2".getBytes(),"say".getBytes(),Bytes.toBytes("跟诸葛亮死掐"));

 

 

        Put put6 = new Put("0006".getBytes());

        put6.addColumn("f1".getBytes(),"id".getBytes(),Bytes.toBytes(5));

        put6.addColumn("f1".getBytes(),"name".getBytes(),Bytes.toBytes("xiaobubu—吕布"));

        put6.addColumn("f1".getBytes(),"age".getBytes(),Bytes.toBytes(28));

        put6.addColumn("f2".getBytes(),"sex".getBytes(),Bytes.toBytes("1"));

        put6.addColumn("f2".getBytes(),"address".getBytes(),Bytes.toBytes("内蒙人"));

        put6.addColumn("f2".getBytes(),"phone".getBytes(),Bytes.toBytes("15788888888"));

        put6.addColumn("f2".getBytes(),"say".getBytes(),Bytes.toBytes("貂蝉去哪了"));

 

        List<Put> listPut = new ArrayList<Put>();

        listPut.add(put);

        listPut.add(put2);

        listPut.add(put3);

        listPut.add(put4);

        listPut.add(put5);

        listPut.add(put6);

 

        myuser.put(listPut);

        myuser.close();

    }

 

 

 

按照rowkey进行查询获取所有列的所有

查询主键rowkey为0003的人

/**

     * 查询数据,按照主键id进行查询

     */

     

    public  void searchData() throws IOException {

        Configuration conf =new Configuration();

        conf.set("hbase.zookeeper.quorum","node01:2181,node02:2181,node03:2181");

        Connection connection = ConnectionFactory.createConnection(conf);

        Table myuser = connection.getTable(TableName.valueOf("myuser"));

 

        Get get = new Get(Bytes.toBytes("0003"));

        Result result = myuser.get(get);

        Cell[] cells = result.rawCells();

        //获取所有的列名称以及列的值

        for (Cell cell : cells) {

            //注意,如果列属性是int类型,那么这里就不会显示

            

if (Bytes.toString(CellUtil.cloneQualifier(cell)).equals("id" )|| Bytes.toString(CellUtil.cloneQualifier(cell)).equals("age")){
    System.out.println(Bytes.toString(CellUtil.cloneFamily(cell)));
    System.out.println(Bytes.toString(CellUtil.cloneQualifier(cell)));
    System.out.println(Bytes.toString(CellUtil.cloneRow(cell)));
    System.out.println(Bytes.toInt(CellUtil.cloneValue(cell)));
}else {
    System.out.println(Bytes.toString(CellUtil.cloneFamily(cell)));
    System.out.println(Bytes.toString(CellUtil.cloneQualifier(cell)));
    System.out.println(Bytes.toString(CellUtil.cloneRow(cell)));
    System.out.println(Bytes.toString(CellUtil.cloneValue(cell)));
}

        }

 

        myuser.close();

}

 

 

按照rowkey查询指定列族下面的指定列的值

 public  static void searchdata2() throws IOException{
    Configuration conf=new Configuration();
    conf.set("hbase.zookeeper.quorum","node01:2181,node02:2181,node03:2181");
    Connection connection = ConnectionFactory.createConnection(conf);

    Table myuser = connection.getTable(TableName.valueOf("myuser"));

    Get get=new Get("0003".getBytes());

// get.addFamily("f1".getBytes());


    get.addColumn("f1".getBytes(),"name".getBytes());
    Result result = myuser.get(get);
    Cell[] cells = result.rawCells();
    for (Cell cell : cells) {
        if (Bytes.toString(CellUtil.cloneQualifier(cell)).equals("id" )|| Bytes.toString(CellUtil.cloneQualifier(cell)).equals("age")){
            System.out.println(Bytes.toString(CellUtil.cloneFamily(cell)));
            System.out.println(Bytes.toString(CellUtil.cloneQualifier(cell)));
            System.out.println(Bytes.toString(CellUtil.cloneRow(cell)));
            System.out.println(Bytes.toInt(CellUtil.cloneValue(cell)));
        }else {
            System.out.println(Bytes.toString(CellUtil.cloneFamily(cell)));
            System.out.println(Bytes.toString(CellUtil.cloneQualifier(cell)));
            System.out.println(Bytes.toString(CellUtil.cloneRow(cell)));
            System.out.println(Bytes.toString(CellUtil.cloneValue(cell)));
        }


    }


}

 

 

 

 

 

 

通过startRowKey和endRowKey进行扫描

/**

     * 通过startRowKey和endRowKey进行扫描查询

     */

        public  static void scanrowkey() throws IOException{
        Configuration conf=new Configuration();
        conf.set("hbase.zookeeper.quorum","node01:2181,node02:2181,node03:2181");
        Connection connection = ConnectionFactory.createConnection(conf);

        Table myuser = connection.getTable(TableName.valueOf("myuser"));

        Scan scan =new Scan();
        scan.setStartRow("0002".getBytes());
        scan.setStopRow("0006".getBytes());

        ResultScanner scanner = myuser.getScanner(scan);
        for (Result result : scanner) {
            System.out.println("rowkey   "+Bytes.toString(result.getRow()));
/*
            System.out.println(Bytes.toInt(result.getValue("f1".getBytes(),"id".getBytes())));
            System.out.println(Bytes.toString(result.getValue("f1".getBytes(),"name".getBytes())));
            System.out.println(Bytes.toInt(result.getValue("f1".getBytes(),"age".getBytes())));
*/

            Cell[] cells = result.rawCells();
            for (Cell cell : cells) {
                if (Bytes.toString(CellUtil.cloneQualifier(cell)).equals("id" )|| Bytes.toString(CellUtil.cloneQualifier(cell)).equals("age")){
                    System.out.println(Bytes.toString(CellUtil.cloneFamily(cell)));
                    System.out.println(Bytes.toString(CellUtil.cloneQualifier(cell)));
                    System.out.println(Bytes.toString(CellUtil.cloneRow(cell)));
                    System.out.println(Bytes.toInt(CellUtil.cloneValue(cell)));
                }else {
                    System.out.println(Bytes.toString(CellUtil.cloneFamily(cell)));
                    System.out.println(Bytes.toString(CellUtil.cloneQualifier(cell)));
                    System.out.println(Bytes.toString(CellUtil.cloneRow(cell)));
                    System.out.println(Bytes.toString(CellUtil.cloneValue(cell)));
                }
            }


        }

    }

 

 

通过scan进行表扫描

 

/**

     * 全表扫描

     */

    public  static void scanrow() throws IOException{
    Configuration conf=new Configuration();
    conf.set("hbase.zookeeper.quorum","node01:2181,node02:2181,node03:2181");
    Connection connection = ConnectionFactory.createConnection(conf);

    Table myuser = connection.getTable(TableName.valueOf("myuser"));

    Scan scan =new Scan();

    ResultScanner scanner = myuser.getScanner(scan);
    for (Result result : scanner) {
        System.out.println("rowkey   "+Bytes.toString(result.getRow()));
        System.out.println(Bytes.toInt(result.getValue("f1".getBytes(),"id".getBytes())));
        System.out.println(Bytes.toString(result.getValue("f1".getBytes(),"name".getBytes())));
        System.out.println(Bytes.toInt(result.getValue("f1".getBytes(),"age".getBytes())));

   /*     Cell[] cells = result.rawCells();
        for (Cell cell : cells) {
            if (Bytes.toString(CellUtil.cloneQualifier(cell)).equals("id" )|| Bytes.toString(CellUtil.cloneQualifier(cell)).equals("age")){
                System.out.println(Bytes.toString(CellUtil.cloneFamily(cell)));
                System.out.println(Bytes.toString(CellUtil.cloneQualifier(cell)));
                System.out.println(Bytes.toString(CellUtil.cloneRow(cell)));
                System.out.println(Bytes.toInt(CellUtil.cloneValue(cell)));
            }else {
                System.out.println(Bytes.toString(CellUtil.cloneFamily(cell)));
                System.out.println(Bytes.toString(CellUtil.cloneQualifier(cell)));
                System.out.println(Bytes.toString(CellUtil.cloneRow(cell)));
                System.out.println(Bytes.toString(CellUtil.cloneValue(cell)));
            }
        }*/


    }

}

 

 

 

4、过滤器查询

过滤器的类型很多,但是可以分为两大类——比较过滤器,专用过滤器

过滤器的作用是在服务端判断数据是否满足条件,然后只将满足条件的数据返回给客户端;

 

hbase过滤器的比较运算符:

LESS  <

LESS_OR_EQUAL <=

EQUAL =

NOT_EQUAL <>

GREATER_OR_EQUAL >=

GREATER >

NO_OP 排除所有

 

Hbase过滤器的专用过滤器(指定比较机制):

BinaryComparator  按字节索引顺序比较指定字节数组,采用Bytes.compareTo(byte[])

BinaryPrefixComparator 跟前面相同,只是比较左端的数据是否相同

NullComparator 判断给定的是否为空

BitComparator 按位比较

RegexStringComparator 提供一个正则的比较器,仅支持 EQUAL 和非EQUAL

SubstringComparator 判断提供的子串是否出现在value中。

 

1、比较过滤器

1、rowKey过滤器RowFilter

通过RowFilter过滤比rowKey  0003小的所有值出来

 

/**

     * hbase行键过滤器RowFilter

     */

     

    public  void rowKeyFilter() throws IOException {

        //获取连接

        Configuration conf =new Configuration();

        conf.set("hbase.zookeeper.quorum","node01:2181,node02:2181,node03:2181");

        Connection connection = ConnectionFactory.createConnection(conf);

        Table myuser = connection.getTable(TableName.valueOf("myuser"));

 

        Scan scan = new Scan();

        RowFilter rowFilter = new RowFilter(CompareFilter.CompareOp.LESS_OR_EQUAL, new BinaryComparator(Bytes.toBytes("0003")));

 

        scan.setFilter(rowFilter);

        ResultScanner resultScanner = myuser.getScanner(scan);

        for (Result result : resultScanner) {

            //获取rowkey

            System.out.println(Bytes.toString(result.getRow()));

 

            //指定列族以及列打印列当中的数据出来

            System.out.println(Bytes.toInt(result.getValue("f1".getBytes(), "id".getBytes())));

            System.out.println(Bytes.toInt(result.getValue("f1".getBytes(), "age".getBytes())));

            System.out.println(Bytes.toString(result.getValue("f1".getBytes(), "name".getBytes())));

        }

        myuser.close();

 

    }

 

2、列族过滤器FamilyFilter

查询比f2列族小的所有的列族内的数据

/**

     * hbase列族过滤器FamilyFilter

     */

     

    public  void familyFilter() throws IOException {

        //获取连接

        Configuration conf =new Configuration();

        conf.set("hbase.zookeeper.quorum","node01:2181,node02:2181,node03:2181");

        Connection connection = ConnectionFactory.createConnection(conf);

        Table myuser = connection.getTable(TableName.valueOf("myuser"));

        Scan scan = new Scan();

        FamilyFilter familyFilter = new FamilyFilter(CompareFilter.CompareOp.LESS, new SubstringComparator("f2"));

        scan.setFilter(familyFilter);

        ResultScanner resultScanner = myuser.getScanner(scan);

        for (Result result : resultScanner) {

            //获取rowkey

            System.out.println(Bytes.toString(result.getRow()));

            //指定列族以及列打印列当中的数据出来

            System.out.println(Bytes.toInt(result.getValue("f1".getBytes(), "id".getBytes())));

            System.out.println(Bytes.toInt(result.getValue("f1".getBytes(), "age".getBytes())));

            System.out.println(Bytes.toString(result.getValue("f1".getBytes(), "name".getBytes())));

        }

        myuser.close();

    }

 

3、列过滤器QualifierFilter

只查询name列的值

/**

     * hbase列过滤器

     */

     

    public  void qualifierFilter() throws IOException {

        //获取连接

        Configuration conf =new Configuration();

        conf.set("hbase.zookeeper.quorum","node01:2181,node02:2181,node03:2181");

        Connection connection = ConnectionFactory.createConnection(conf);

        Table myuser = connection.getTable(TableName.valueOf("myuser"));

        Scan scan = new Scan();

        QualifierFilter qualifierFilter = new QualifierFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator("name"));

        scan.setFilter(qualifierFilter);

        ResultScanner resultScanner = myuser.getScanner(scan);

        for (Result result : resultScanner) {

            //获取rowkey

            System.out.println(Bytes.toString(result.getRow()));

            //指定列族以及列打印列当中的数据出来

        //    System.out.println(Bytes.toInt(result.getValue("f1".getBytes(), "id".getBytes())));

            System.out.println(Bytes.toString(result.getValue("f1".getBytes(), "name".getBytes())));

        }

        myuser.close();

    }

 

 

4、列值过滤器ValueFilter

查询所有列当中包含8的数据

/**

     * hbase值过滤器

     * 查询包含8的列值

     */

     

    public  void valueFilter() throws IOException {

        //获取连接

        Configuration conf =new Configuration();

        conf.set("hbase.zookeeper.quorum","node01:2181,node02:2181,node03:2181");

        Connection connection = ConnectionFactory.createConnection(conf);

        Table myuser = connection.getTable(TableName.valueOf("myuser"));

        Scan scan = new Scan();

        ValueFilter valueFilter = new ValueFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator("8"));

 

        scan.setFilter(valueFilter);

        ResultScanner resultScanner = myuser.getScanner(scan);

        for (Result result : resultScanner) {

            //获取rowkey

            System.out.println(Bytes.toString(result.getRow()));

            //指定列族以及列打印列当中的数据出来

            //    System.out.println(Bytes.toInt(result.getValue("f1".getBytes(), "id".getBytes())));

            System.out.println(Bytes.toString(result.getValue("f2".getBytes(), "phone".getBytes())));

        }

        myuser.close();

    }

 

 

 

 

 

 

 

 

 

 

 

2专用过滤器

1、单列值过滤器 SingleColumnValueFilter

SingleColumnValueFilter会返回满足条件的整列值的所有字段

 

/**

     * 单列值过滤器,返回满足条件的整行数据

     */

     

    public void singleColumnFilter() throws IOException {

        //获取连接

        Configuration conf =new Configuration();

        conf.set("hbase.zookeeper.quorum","node01:2181,node02:2181,node03:2181");

        Connection connection = ConnectionFactory.createConnection(conf);

        Table myuser = connection.getTable(TableName.valueOf("myuser"));

        Scan scan = new Scan();

        SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter("f1".getBytes(), "name".getBytes(), CompareFilter.CompareOp.EQUAL, "刘备".getBytes());

        scan.setFilter(singleColumnValueFilter);

        ResultScanner resultScanner = myuser.getScanner(scan);

        for (Result result : resultScanner) {

            //获取rowkey

            System.out.println(Bytes.toString(result.getRow()));

            //指定列族以及列打印列当中的数据出来

            System.out.println(Bytes.toInt(result.getValue("f1".getBytes(), "id".getBytes())));

            System.out.println(Bytes.toString(result.getValue("f1".getBytes(), "name".getBytes())));

            System.out.println(Bytes.toString(result.getValue("f2".getBytes(), "phone".getBytes())));

        }

        myuser.close();

    }

 

 

2、列值排除过滤器SingleColumnValueExcludeFilter

与SingleColumnValueFilter相反,会排除掉指定的列,其他的列全部返回

 

3、rowkey前缀过滤器PrefixFilter

查询以00开头的所有前缀的rowkey

/**

     * 行键前缀过滤器

     */

     

    public void preFilter() throws IOException {

 

        //获取连接

        Configuration conf =new Configuration();

        conf.set("hbase.zookeeper.quorum","node01:2181,node02:2181,node03:2181");

        Connection connection = ConnectionFactory.createConnection(conf);

        Table myuser = connection.getTable(TableName.valueOf("myuser"));

        Scan scan = new Scan();

        PrefixFilter prefixFilter = new PrefixFilter("00".getBytes());

        scan.setFilter(prefixFilter);

        ResultScanner resultScanner = myuser.getScanner(scan);

        for (Result result : resultScanner) {

            //获取rowkey

            System.out.println(Bytes.toString(result.getRow()));

            //指定列族以及列打印列当中的数据出来

            System.out.println(Bytes.toInt(result.getValue("f1".getBytes(), "id".getBytes())));

            System.out.println(Bytes.toString(result.getValue("f1".getBytes(), "name".getBytes())));

            System.out.println(Bytes.toString(result.getValue("f2".getBytes(), "phone".getBytes())));

        }

        myuser.close();

 

    }

 

 

4、分页过滤器PageFilter

通过pageFilter实现分页过滤器

 

    public void pageFilter2() throws IOException {

        //获取连接

        Configuration conf =new Configuration();

        conf.set("hbase.zookeeper.quorum", "node01:2181,node02:2181,node03:2181");

        Connection connection = ConnectionFactory.createConnection(conf);

        Table myuser = connection.getTable(TableName.valueOf("myuser"));

        int pageNum = 3;

        int pageSize = 2;

        Scan scan = new Scan();

        if (pageNum == 1) {

            PageFilter filter = new PageFilter(pageSize);

            scan.setStartRow(Bytes.toBytes(""));

            scan.setFilter(filter);

            scan.setMaxResultSize(pageSize);

            ResultScanner scanner = myuser.getScanner(scan);

            for (Result result : scanner) {

                //获取rowkey

                System.out.println(Bytes.toString(result.getRow()));

                //指定列族以及列打印列当中的数据出来

//            System.out.println(Bytes.toInt(result.getValue("f1".getBytes(), "id".getBytes())));

                System.out.println(Bytes.toString(result.getValue("f1".getBytes(), "name".getBytes())));

                //System.out.println(Bytes.toString(result.getValue("f2".getBytes(), "phone".getBytes())));

            }

 

        }else{

            String startRowKey ="";

            PageFilter filter = new PageFilter((pageNum - 1) * pageSize + 1  );

            scan.setStartRow(startRowKey.getBytes());

            scan.setMaxResultSize((pageNum - 1) * pageSize + 1);

            scan.setFilter(filter);

            ResultScanner scanner = myuser.getScanner(scan);

            for (Result result : scanner) {

                byte[] row = result.getRow();

                startRowKey =  new String(row);

            }

            Scan scan2 = new Scan();

            scan2.setStartRow(startRowKey.getBytes());

            scan2.setMaxResultSize(Long.valueOf(pageSize));

            PageFilter filter2 = new PageFilter(pageSize);

            scan2.setFilter(filter2);

 

            ResultScanner scanner1 = myuser.getScanner(scan2);

            for (Result result : scanner1) {

                byte[] row = result.getRow();

                System.out.println(new String(row));

            }

        }

        myuser.close();

    }

 

 

3、多过滤器综合查询FilterList

需求:使用SingleColumnValueFilter查询f1列族,name为刘备的数据,并且同时满足rowkey的前缀以00开头的数据(PrefixFilter)

 

 

/**

     * 多过滤器组合使用

     */

     

    public void manyFilter() throws IOException {

        //获取连接

        Configuration conf =new Configuration();

        conf.set("hbase.zookeeper.quorum", "node01:2181,node02:2181,node03:2181");

        Connection connection = ConnectionFactory.createConnection(conf);

        Table myuser = connection.getTable(TableName.valueOf("myuser"));

        Scan scan = new Scan();

        FilterList filterList = new FilterList();

 

        SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter("f1".getBytes(), "name".getBytes(), CompareFilter.CompareOp.EQUAL, "刘备".getBytes());

        PrefixFilter prefixFilter = new PrefixFilter("00".getBytes());

        filterList.addFilter(singleColumnValueFilter);

        filterList.addFilter(prefixFilter);

        scan.setFilter(filterList);

        ResultScanner scanner = myuser.getScanner(scan);

        for (Result result : scanner) {

            //获取rowkey

            System.out.println(Bytes.toString(result.getRow()));

            //指定列族以及列打印列当中的数据出来

//            System.out.println(Bytes.toInt(result.getValue("f1".getBytes(), "id".getBytes())));

            System.out.println(Bytes.toString(result.getValue("f1".getBytes(), "name".getBytes())));

            //System.out.println(Bytes.toString(result.getValue("f2".getBytes(), "phone".getBytes())));

        }

        myuser.close();

 

    }

 

 

 

 

 

5、根据rowkey删除数据

/**

     * 删除数据

     */

     

    public  void  deleteByRowKey() throws IOException {

        //获取连接

        Configuration conf =new Configuration();

        conf.set("hbase.zookeeper.quorum","node01:2181,node02:2181,node03:2181");

        Connection connection = ConnectionFactory.createConnection(conf);

        Table myuser = connection.getTable(TableName.valueOf("myuser"));

        Delete delete = new Delete("0001".getBytes());

        myuser.delete(delete);

        myuser.close();

    }

 

 

 

 

6、删除表操作

 

    public void  deleteTable() throws IOException {

        //获取连接

        Configuration conf =new Configuration();

        conf.set("hbase.zookeeper.quorum","node01:2181,node02:2181,node03:2181");

        Connection connection = ConnectionFactory.createConnection(conf);

        Admin admin = connection.getAdmin();

        admin.disableTable(TableName.valueOf("myuser"));

        admin.deleteTable(TableName.valueOf("myuser"));

        admin.close();

    }

 

14、HBase与MapReduce的集成

HBase当中的数据最终都是存储在HDFS上面的,HBase天生的支持MR的操作,我们可以通过MR直接处理HBase当中的数据,并且MR可以将处理后的结果直接存储到HBase当中去

 

需求:读取HBase当中一张表的数据,然后将数据写入到HBase当中的另外一张表当中去。注意:我们可以使用TableMapper与TableReducer来实现从HBase当中读取与写入数据

 

这里我们将myuser这张表当中f1列族的name和age字段写入到myuser2这张表的f1列族当中去

 

http://archive.cloudera.com/cdh5/cdh/5/hbase-1.2.0-cdh5.14.0/book.html#mapreduce

 

需求一:读取myuser这张表当中的数据写入到HBase的另外一张表当中去

第一步:创建myuser2这张

注意:列族的名字要与myuser表的列族名字相同

hbase(main):010:0> create 'myuser2','f1'

第二步创建maven工程,导入jar包

<repositories>

        <repository>

            <id>cloudera</id>

            <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>

        </repository>

    </repositories>

 

    <dependencies>

 

        <dependency>

            <groupId>org.apache.hadoop</groupId>

            <artifactId>hadoop-client</artifactId>

            <version>2.6.0-mr1-cdh5.14.0</version>

        </dependency>

        <dependency>

            <groupId>org.apache.hbase</groupId>

            <artifactId>hbase-client</artifactId>

            <version>1.2.0-cdh5.14.0</version>

        </dependency>

        <dependency>

            <groupId>org.apache.hbase</groupId>

            <artifactId>hbase-server</artifactId>

            <version>1.2.0-cdh5.14.0</version>

        </dependency>

        <dependency>

            <groupId>junit</groupId>

            <artifactId>junit</artifactId>

            <version>4.12</version>

            <scope>test</scope>

        </dependency>

        <dependency>

            <groupId>org.testng</groupId>

            <artifactId>testng</artifactId>

            <version>6.14.3</version>

            <scope>test</scope>

        </dependency>

 

 

    </dependencies>

 

    <build>

        <plugins>

            <plugin>

                <groupId>org.apache.maven.plugins</groupId>

                <artifactId>maven-compiler-plugin</artifactId>

                <version>3.0</version>

                <configuration>

                    <source>1.8</source>

                    <target>1.8</target>

                    <encoding>UTF-8</encoding>

                    <!--    <verbal>true</verbal>-->

                </configuration>

            </plugin>

            <plugin>

                <groupId>org.apache.maven.plugins</groupId>

                <artifactId>maven-shade-plugin</artifactId>

                <version>2.2</version>

                <executions>

                    <execution>

                        <phase>package</phase>

                        <goals>

                            <goal>shade</goal>

                        </goals>

                        <configuration>

                            <filters>

                                <filter>

                                    <artifact>*:*</artifact>

                                    <excludes>

                                        <exclude>META-INF/*.SF</exclude>

                                        <exclude>META-INF/*.DSA</exclude>

                                        <exclude>META-INF/*/RSA</exclude>

                                    </excludes>

                                </filter>

                            </filters>

                        </configuration>

                    </execution>

                </executions>

            </plugin>

        </plugins>

    </build>

 

第三步:开发MR的程序

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

 

public class HBaseMR extends Configured implements Tool{

 

 

    public static class HBaseMapper extends  TableMapper<Text,Put>{

        /**

         *

         * @param key  我们的主键rowkey

         * @param value  我们一行数据所有列的值都封装在value里面了

         * @param context

         * @throws IOException

         * @throws InterruptedException

         */

        @Override

        protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {

            byte[] bytes = key.get();

            String rowKey = Bytes.toString(bytes);

            Put put = new Put(key.get());

            Cell[] cells = value.rawCells();

            for (Cell cell : cells) {

                if("f1".equals(Bytes.toString(CellUtil.cloneFamily(cell)))){

                    if("name".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))){

                        put.add(cell);

                    }

                    if("age".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))){

                        put.add(cell);

                    }

                }

            }

           if(!put.isEmpty()){

                context.write(new Text(rowKey),put);

            }

        }

    }

    public static class HBaseReducer extends TableReducer<Text,Put,ImmutableBytesWritable>{

        @Override

        protected void reduce(Text key, Iterable<Put> values, Context context) throws IOException, InterruptedException {

            for (Put value : values) {

                context.write(null,value);

            }

        }

    }

    @Override

    public int run(String[] args) throws Exception {

        Job job = Job.getInstance(super.getConf(), "hbaseMr");

        job.setJarByClass(this.getClass());

        Scan scan = new Scan();

        scan.setCaching(500);

        scan.setCacheBlocks(false);

        //使用TableMapReduceUtil 工具类来初始化我们的mapper

        TableMapReduceUtil.initTableMapperJob(TableName.valueOf("myuser"),scan,HBaseMapper.class,Text.class,Put.class,job);

        //使用TableMapReduceUtil 工具类来初始化我们的reducer

        TableMapReduceUtil.initTableReducerJob("myuser2",HBaseReducer.class,job);

 

        job.setNumReduceTasks(1);

 

        boolean b = job.waitForCompletion(true);

        return b?0:1;

    }

 

    public static void main(String[] args) throws Exception {

        //创建HBaseConfiguration配置

        Configuration conf =new Configuration();

        int run = ToolRunner.run(configuration, new HBaseMR(), args);

        System.exit(run);

 

    }

 

}

 

 

第四步:打包运行

注意,我们需要使用打包插件,将HBase的依赖jar包都打入到工程jar包里面去

然后执行

yarn jar hbaseStudy-1.0-SNAPSHOT.jar  cn.itcast.hbasemr.HBaseMR

 

 

或者我们也可以自己设置我们的环境变量

 

export HADOOP_HOME=/export/servers/hadoop-2.6.0-cdh5.14.0/

export HBASE_HOME=/export/servers/hbase-1.2.0-cdh5.14.0/

export HADOOP_CLASSPATH=`${HBASE_HOME}/bin/hbase mapredcp`

yarn jar original-hbaseStudy-1.0-SNAPSHOT.jar  cn.itcast.hbasemr.HBaseMR

需求二:读取HDFS文件,写入到HBase表当中去

读取hdfs路径/hbase/input/user.txt内容如下

0007    zhangsan        18

0008    lisi    25

0009    wangwu  20

 

第一步准备数据文件

准备数据文件,并将数据文件上传到HDFS上面去

hdfs dfs -mkdir -p /hbase/input

cd /export/servers/

vim user.txt

 

0007    zhangsan        18

0008    lisi    25

0009    wangwu  20

第二步:开发MR程序

 

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

 

public class Hdfs2Hbase extends Configured implements Tool{

    @Override

    public int run(String[] args) throws Exception {

        Job job = Job.getInstance(super.getConf(), "hdfs2Hbase");

        job.setJarByClass(Hdfs2Hbase.class);

        job.setInputFormatClass(TextInputFormat.class);

        TextInputFormat.addInputPath(job,new Path("hdfs://node01:8020/hbase/input"));

        job.setMapperClass(HdfsMapper.class);

        job.setMapOutputKeyClass(Text.class);

        job.setMapOutputValueClass(NullWritable.class);

 

        TableMapReduceUtil.initTableReducerJob("myuser2",HBaseReducer.class,job);

        job.setNumReduceTasks(1);

        boolean b = job.waitForCompletion(true);

 

        return b?0:1;

    }

 

 

    public static void main(String[] args) throws Exception {

        Configuration conf =new Configuration();

        int run = ToolRunner.run(configuration, new Hdfs2Hbase(), args);

        System.exit(run);

    }

 

 

    public static class HdfsMapper extends Mapper<LongWritable,Text,Text,NullWritable>{

        @Override

        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

            context.write(value,NullWritable.get());

        }

    }

 

    public static class HBaseReducer extends TableReducer<Text,NullWritable,ImmutableBytesWritable>{

 

        @Override

        protected void reduce(Text key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {

            String[] split = key.toString().split("\t");

            Put put = new Put(Bytes.toBytes(split[0]));

            put.addColumn("f1".getBytes(),"name".getBytes(),split[1].getBytes());

            put.addColumn("f1".getBytes(),"age".getBytes(),Bytes.toBytes(Integer.parseInt(split[2])));

            context.write(new ImmutableBytesWritable(Bytes.toBytes(split[0])),put);

        }

    }

 

}

 

 

需求三作业,读取HBase的表数据,然后将数据写入到hdfs上面

 

 

需求四:通过bulkload的方式批量加载数据到HBase当中去

加载数据到HBase当中去的方式多种多样,我们可以使用HBase的javaAPI或者使用sqoop将我们的数据写入或者导入到HBase当中去,但是这些方式不是慢就是在导入的过程的占用Region资料导致效率低下,我们也可以通过MR的程序,将我们的数据直接转换成HBase的最终存储格式HFile,然后直接load数据到HBase当中去即可

 

HBase中每张Table在根目录(/HBase)下用一个文件夹存储,Table名为文件夹名,在Table文件夹下每个Region同样用一个文件夹存储,每个Region文件夹下的每个列族也用文件夹存储,而每个列族下存储的就是一些HFile文件,HFile就是HBase数据在HFDS下存储格式,所以HBase存储文件最终在hdfs上面的表现形式就是HFile,如果我们可以直接将数据转换为HFile的格式,那么我们的HBase就可以直接读取加载HFile格式的文件,就可以直接读取了

优点:

 

1.导入过程不占用Region资源

 

2.能快速导入海量的数据

 

3.节省内存

 

 

HBase数据正常读写流程

 

 

使用bulkload的方式将我们的数据直接生成HFile格式,然后直接加载到HBase的表当中去

 

 

需求:将我们hdfs上面的这个路径/hbase/input/user.txt的数据文件,转换成HFile格式,然后load到myuser2这张表里面去

 

第一步:定义我们的mapper类

import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

 

public class LoadMapper  extends Mapper<LongWritable,Text,ImmutableBytesWritable,Put>{

    @Override

    protected void map(LongWritable key, Text value, Mapper.Context context) throws IOException, InterruptedException {

        String[] split = value.toString().split("\t");

        Put put = new Put(Bytes.toBytes(split[0]));

        put.addColumn("f1".getBytes(),"name".getBytes(),split[1].getBytes());

        put.addColumn("f1".getBytes(),"age".getBytes(),Bytes.toBytes(Integer.parseInt(split[2])));

        context.write(new ImmutableBytesWritable(Bytes.toBytes(split[0])),put);

    }

}

 

 

 

第二步开发我们的main程序入口类

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

 

public class HBaseLoad  extends Configured implements Tool {

    @Override

    public int run(String[] args) throws Exception {

        final String INPUT_PATH= "hdfs://node01:8020/hbase/input";

        final String OUTPUT_PATH= "hdfs://node01:8020/hbase/output_hfile";

        Configuration conf = HBaseConfiguration.create();

        Connection connection = ConnectionFactory.createConnection(conf);

        Table table = connection.getTable(TableName.valueOf("myuser2"));

        Job job= Job.getInstance(conf);

        job.setJarByClass(HBaseLoad.class);

        job.setMapperClass(LoadMapper.class);

        job.setMapOutputKeyClass(ImmutableBytesWritable.class);

        job.setMapOutputValueClass(Put.class);

        job.setOutputFormatClass(HFileOutputFormat2.class);

        HFileOutputFormat2.configureIncrementalLoad(job,table,connection.getRegionLocator(TableName.valueOf("myuser2")));

        FileInputFormat.addInputPath(job,new Path(INPUT_PATH));

        FileOutputFormat.setOutputPath(job,new Path(OUTPUT_PATH));

        boolean b = job.waitForCompletion(true);

        return b?0:1;

    }

 

    public static void main(String[] args) throws Exception {

        Configuration conf =new Configuration();

        int run = ToolRunner.run(configuration, new HBaseLoad(), args);

        System.exit(run);

    }

}

 

 

第三步代码打成jar包然后进行运行

yarn jar original-hbaseStudy-1.0-SNAPSHOT.jar  cn.itcast.hbasemr.HBaseLoad

 

第四步:开发代码,加载数据

将我们的输出路径下面的HFile文件,加载到我们的hbase表当中去

 

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles;

 

public class LoadData {

    public static void main(String[] args) throws Exception {

        Configuration conf =new Configuration();

        conf.set("hbase.zookeeper.property.clientPort", "2181");

        configuration.set("hbase.zookeeper.quorum", "node01,node02,node03");

 

        Connection connection =  ConnectionFactory.createConnection(conf);

        Admin admin = connection.getAdmin();

        Table table = connection.getTable(TableName.valueOf("myuser2"));

        LoadIncrementalHFiles load = new LoadIncrementalHFiles(configuration);

        load.doBulkLoad(new Path("hdfs://node01:8020/hbase/output_hfile"), admin,table,connection.getRegionLocator(TableName.valueOf("myuser2")));

    }

 

}

或者我们也可以通过命令行来进行加载数据

先将hbase的jar包添加到hadoop的classpath路径下

export HBASE_HOME=/export/servers/hbase-1.2.0-cdh5.14.0/

export HADOOP_HOME=/export/servers/hadoop-2.6.0-cdh5.14.0/

export HADOOP_CLASSPATH=`${HBASE_HOME}/bin/hbase mapredcp`

然后执行以下命令,将hbase的HFile直接导入到表myuser2当中来

 

yarn jar /export/servers/hbase-1.2.0-cdh5.14.0/lib/hbase-server-1.2.0-cdh5.14.0.jar completebulkload /hbase/output_hfile myuser2

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值