HBase(4)- HBase API操作

1 HBase API操作

1.1 环境准备

新建项目后在pom.xml中添加依赖:

        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-server</artifactId>
            <version>1.4.13</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>1.4.13</version>
        </dependency>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.12</version>
        </dependency>

1.2 HBaseAPI代码编写

package com.jackyan.hbase;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class TestHbaseApi {
    Connection connection = null;
    Configuration configuration = null;
    String tableName = "jackyan:emp";

    /**
     * 获取连接
     * @throws IOException
     */
    @Before
    public void getConnect() throws IOException {
        configuration = HBaseConfiguration.create();
        connection = ConnectionFactory.createConnection(configuration);
        System.out.println(connection);
    }

    /**
     * 创建表
     * 1 判断namespace是否存在,不存在则先创建namespace
     */
    @Test
    public void createTable() throws IOException {

        HBaseAdmin admin = (HBaseAdmin) connection.getAdmin();
        boolean b = admin.tableExists(tableName);

        if (b) {
            System.out.println("表已存在!!!");
        }
        // 表不存在则创建表
        if(!b) {
            TableName table = TableName.valueOf(tableName);
            HTableDescriptor hTableDescriptor = new HTableDescriptor(table);
            HColumnDescriptor family = new HColumnDescriptor("basic");
            hTableDescriptor.addFamily(family);
            family = new HColumnDescriptor("info");
            hTableDescriptor.addFamily(family);

            try {
                admin.createTable(hTableDescriptor);
            } catch (NamespaceNotFoundException e) {
                // 命名空间不存在则先创建命名空间
                System.out.println("创建命名空间。。。");
                NamespaceDescriptor namespaceDescriptor = NamespaceDescriptor.create("jackyan").build();
                admin.createNamespace(namespaceDescriptor);

                // 再创建表
                admin.createTable(hTableDescriptor);
            }
            System.out.println("表创建成功!!!");
        }
        admin.close();
    }

    /**
     * 删除表
     * @throws IOException
     */
    @Test
    public void deleteTable() throws IOException {
        // 判断表是否存在
        HBaseAdmin admin = (HBaseAdmin) connection.getAdmin();
        // 表如果存在,则删除表
        if (admin.tableExists(tableName)) {
            // 先disable表
            admin.disableTable(tableName);
            // 删除表
            admin.deleteTable(tableName);
            System.out.println("表 " + tableName + " 删除成功!");
        } else {
            System.out.println("表 " + tableName + " 不存在!");
        }

        admin.close();
    }

    /**
     * 插入数据
     */
    @Test
    public void addRowData() throws IOException {
        // 获取表对象
        Table table = connection.getTable(TableName.valueOf(tableName));
        String rowkey = "100001";
        Put put = new Put(Bytes.toBytes(rowkey));
        String family = "basic";
        String column = "name";
        String value = "tom";
        put.addColumn(Bytes.toBytes(family), Bytes.toBytes(column), Bytes.toBytes(value));
        family = "basic";
        column = "age";
        value = "28";
        put.addColumn(Bytes.toBytes(family), Bytes.toBytes(column), Bytes.toBytes(value));
        family = "basic";
        column = "sex";
        value = "male";
        put.addColumn(Bytes.toBytes(family), Bytes.toBytes(column), Bytes.toBytes(value));
        table.put(put);
        rowkey = "100002";
        put = new Put(Bytes.toBytes(rowkey));
        family = "basic";
        column = "name";
        value = "jack";
        put.addColumn(Bytes.toBytes(family), Bytes.toBytes(column), Bytes.toBytes(value));
        family = "basic";
        column = "age";
        value = "26";
        put.addColumn(Bytes.toBytes(family), Bytes.toBytes(column), Bytes.toBytes(value));
        family = "basic";
        column = "sex";
        value = "male";
        put.addColumn(Bytes.toBytes(family), Bytes.toBytes(column), Bytes.toBytes(value));
        family = "info";
        column = "email";
        value = "12345678@qq.com";
        put.addColumn(Bytes.toBytes(family), Bytes.toBytes(column), Bytes.toBytes(value));
        family = "info";
        column = "phone";
        value = "12345678";
        put.addColumn(Bytes.toBytes(family), Bytes.toBytes(column), Bytes.toBytes(value));
        table.put(put);
        table.close();
        System.out.println("插入数据成功!");
        table.close();
    }

    /**
     * 获取所有行的数据
     */
    @Test
    public void getAllRowData() throws IOException {
        // 获取用于获取region数据的scan对象
        Scan scan = new Scan();

        // 获取表对象
        Table table = connection.getTable(TableName.valueOf(tableName));

        // 获取查询结果集
        ResultScanner resultScanner = table.getScanner(scan);

        for (Result result : resultScanner) {
            // 获取单元格数据
            Cell[] cells = result.rawCells();
            for (Cell cell : cells) {
                System.out.println("rowkey:" + Bytes.toString(CellUtil.cloneRow(cell)));
                System.out.println("family:" + Bytes.toString(CellUtil.cloneFamily(cell)));
                System.out.println("column:" + Bytes.toString(CellUtil.cloneQualifier(cell)));
                System.out.println("value:" + Bytes.toString(CellUtil.cloneValue(cell)));
            }
        }
        table.close();
    }

    /**
     * 获取一行的数据
     */
    @Test
    public void getOneRowData() throws IOException {

        String rowkey = "100002";
        Get get = new Get(Bytes.toBytes(rowkey));

        // 获取表对象
        Table table = connection.getTable(TableName.valueOf(tableName));

        // 获取查询结果集;
        Result result = table.get(get);

        // 获取单元格数据
        Cell[] cells = result.rawCells();
        for (Cell cell : cells) {
            System.out.println("rowkey:" + Bytes.toString(CellUtil.cloneRow(cell)));
            System.out.println("family:" + Bytes.toString(CellUtil.cloneFamily(cell)));
            System.out.println("column:" + Bytes.toString(CellUtil.cloneQualifier(cell)));
            System.out.println("value:" + Bytes.toString(CellUtil.cloneValue(cell)));
        }
        table.close();
    }

    /**
     * 获取指定列族:列的数据
     */
    @Test
    public void getRowQualifier() throws IOException {

        String rowkey = "100001";
        Get get = new Get(Bytes.toBytes(rowkey));
        String family = "basic";
        String qualifier = "name";
        get.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier));

        // 获取表对象
        Table table = connection.getTable(TableName.valueOf(tableName));

        // 获取查询结果集;
        Result result = table.get(get);

        // 获取单元格数据
        Cell[] cells = result.rawCells();
        for (Cell cell : cells) {
            System.out.println("rowkey:" + Bytes.toString(CellUtil.cloneRow(cell)));
            System.out.println("family:" + Bytes.toString(CellUtil.cloneFamily(cell)));
            System.out.println("column:" + Bytes.toString(CellUtil.cloneQualifier(cell)));
            System.out.println("value:" + Bytes.toString(CellUtil.cloneValue(cell)));
        }
        table.close();
    }

    /**
     * 删除多行数据
     */
    @Test
    public void deleteRowData() throws IOException {
        Table table = connection.getTable(TableName.valueOf(tableName));

        List<Delete> deleteList = new ArrayList<Delete>();
        String rowkey = "100001";
        Delete delete = new Delete(Bytes.toBytes(rowkey));
        deleteList.add(delete);

        rowkey = "100002";
        delete = new Delete(Bytes.toBytes(rowkey));
        deleteList.add(delete);
        table.delete(deleteList);
        table.close();
        System.out.println("删除数据成功!");
    }

    /**
     * 关闭连接
     */
    @After
    public void close() throws IOException {
        if (connection != null) {
            connection.close();
        }
        System.out.println("关闭资源。。。");
    }
}

1.3 HbaseUtil

package com.jackyan.hbase;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;
import java.util.Map;
import java.util.Set;

/**
 * Hbase工具类
 */
public class HbaseUtil {

    // 为了线程安全,故使用ThreadLocal
    private static ThreadLocal<Connection> connThreadLocal = new ThreadLocal<Connection>();

    /**
     * 获取连接
     */
    public static void getConnection() {
        /*Configuration conf = HBaseConfiguration.create();

        try {
            conn = ConnectionFactory.createConnection(conf);
        } catch (IOException e) {
            e.printStackTrace();
        }*/
        // 从ThreadLocal中获取连接对象
        Connection conn = connThreadLocal.get();

        // 如果获取不到连接,则创建连接并放入ThreadLocal中
        if (conn == null) {
            Configuration conf = HBaseConfiguration.create();
            try {
                conn = ConnectionFactory.createConnection(conf);
                connThreadLocal.set(conn);
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    /**
     * 插入单列数据
     */
    public static void put(String tablename, String rowkey, String famliyname, String column, String value) throws Exception {
        // 获取连接
        Connection conn = connThreadLocal.get();
        // 获取表
        Table table = conn.getTable(TableName.valueOf(tablename));
        // 将字符串转换成byte[]
        byte[] rowkeybyte = Bytes.toBytes(rowkey);
        Put put = new Put(rowkeybyte);
        put.addColumn(Bytes.toBytes(famliyname), Bytes.toBytes(column), Bytes.toBytes(value));
        table.put(put);
        table.close();
        System.out.println("ok");
    }

    /**
     * 插入多列数据
     * @param tablename
     * @param rowkey
     * @param famliyname
     * @param datamap 列、值对
     * @throws Exception
     */
    public static void put(String tablename, String rowkey, String famliyname, Map<String,String> datamap) throws Exception {
        // 获取连接
        Connection conn = connThreadLocal.get();
        // 获取表
        Table table = conn.getTable(TableName.valueOf(tablename));
        // 将字符串转换成byte[]
        byte[] rowkeybyte = Bytes.toBytes(rowkey);
        Put put = new Put(rowkeybyte);
        if(datamap != null){
            Set<Map.Entry<String,String>> set = datamap.entrySet();
            for(Map.Entry<String,String> entry : set){
                String column = entry.getKey();
                String value = entry.getValue();
                put.addColumn(Bytes.toBytes(famliyname), Bytes.toBytes(column), Bytes.toBytes(value));
            }
        }
        table.put(put);
        table.close();
        System.out.println("ok");
    }

    /**
     * 查询数据
     */
    public static String getdata(String tablename, String rowkey, String famliyname, String colum) throws Exception {
        // 获取连接
        Connection conn = connThreadLocal.get();
        // 获取表
        Table table = conn.getTable(TableName.valueOf(tablename));
        // 将字符串转换成byte[]
        byte[] rowkeybyte = Bytes.toBytes(rowkey);
        Get get = new Get(rowkeybyte);
        Result result =table.get(get);
        byte[] resultbytes = result.getValue(famliyname.getBytes(),colum.getBytes());
        if(resultbytes == null){
            return null;
        }

        return new String(resultbytes);
    }

    /**
     * 插入数据
     */
    public static void putdata(String tablename, String rowkey, String famliyname,String colum,String data) throws Exception {
        // 获取连接
        Connection conn = connThreadLocal.get();
        // 获取表
        Table table = conn.getTable(TableName.valueOf(tablename));
        Put put = new Put(rowkey.getBytes());
        put.addColumn(famliyname.getBytes(),colum.getBytes(),data.getBytes());
        table.put(put);
    }

    /**
     * 关闭连接
     */
    public static void close() {
        Connection conn = connThreadLocal.get();
        if (conn != null) {
            try {
                conn.close();
                connThreadLocal.remove();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

1.4 MapReduce

通过HBase的相关JavaAPI,我们可以实现伴随HBase操作的MapReduce过程,比如使用MapReduce将数据从本地文件系统导入到HBase的表中,比如我们从HBase中读取一些原始数据后使用MapReduce做数据分析。

1.4.1 官方HBase-MapReduce

1.查看HBase的MapReduce任务的执行

$ bin/hbase mapredcp

2.环境变量的导入
(1)执行环境变量的导入(临时生效,在命令行执行下述操作)

$ export HBASE_HOME=/opt/module/hbase
$ export HADOOP_HOME=/opt/module/hadoop
$ export HADOOP_CLASSPATH=`${HBASE_HOME}/bin/hbase mapredcp`

(2)永久生效:在/etc/profile配置

export HBASE_HOME=/opt/module/hbase
export HADOOP_HOME=/opt/module/hadoop

并在hadoop-env.sh中配置:(注意:在for循环之后配)

export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:/opt/module/hbase/lib/*

3.运行官方的MapReduce任务
-- 案例一:统计Student表中有多少行数据

$ yarn jar lib/hbase-server-1.4.13.jar rowcounter student

1.4.2 从hbase一个表将数据迁移到另一个表

1、代码编写
Table2TableApplication

public class Table2TableApplication {

    public static void main(String[] args) throws Exception{

        // TooRunner可以运行mapreduce作业
        ToolRunner.run(new Table2TableMapReduceTool(), args);
    }
}

Table2TableMapReduceTool

public class Table2TableMapReduceTool implements Tool {
    public int run(String[] args) throws Exception {

        // 获取作业
        Job job = Job.getInstance();
        job.setJarByClass(Table2TableMapReduceTool.class);

        // mapper
        TableMapReduceUtil.initTableMapperJob(
                "jackyan:emp",
                new Scan(),
                ReadDataFromTableMapper.class,
                ImmutableBytesWritable.class,
                Put.class,
                job
        );

        //reducer
        TableMapReduceUtil.initTableReducerJob(
                "emp",
                WriteDataToTableMRReducer.class,
                job
        );

        boolean b = job.waitForCompletion(true);
        return b ? JobStatus.State.SUCCEEDED.getValue() : JobStatus.State.FAILED.getValue();
    }

    public void setConf(Configuration conf) {

    }

    public Configuration getConf() {
        return null;
    }
}

ReadDataFromTableMapper

public class ReadDataFromTableMapper extends TableMapper<ImmutableBytesWritable, Put> {
    @Override
    protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {

        //将fruit的name和color提取出来,相当于将每一行数据读取出来放入到Put对象中。
        // 构建put
        Put put = new Put(key.get());

        // 循环遍历每个cell,cell也就是一列数据
        Cell[] cells = value.rawCells();

        //复制部分数据
        for(Cell cell: cells){
            //添加/克隆列族:info
            if("basic".equals(Bytes.toString(CellUtil.cloneFamily(cell)))){
                //添加/克隆列:name
                if("name".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))){
                    //将该列cell加入到put对象中
                    put.add(cell);
                    //添加/克隆列:age
                } else if("age".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))){
                    //向该列cell加入到put对象中
                    put.add(cell);
                    //添加/克隆列:sex
                } else if("sex".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))){
                    //向该列cell加入到put对象中
                    put.add(cell);
                }
            }
        }
        // 复制全部数据,要求两张表的结构一致
/*        for (Cell cell : cells) {
            put.addColumn(CellUtil.cloneFamily(cell), CellUtil.cloneQualifier(cell), CellUtil.cloneValue(cell));
        }*/

        //将读取到的每行数据写入到context中作为map的输出
        context.write(key, put);
    }
}

WriteDataToTableMRReducer

public class WriteDataToTableMRReducer extends TableReducer<ImmutableBytesWritable, Put, NullWritable> {
    @Override
    protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context) throws IOException, InterruptedException {

        for (Put put : values) {
            context.write(NullWritable.get(), put);
        }
    }
}

2、打包
此处需要打成可运行jar包
选中需要打包的项目或模块-->Project Structure-->Artifacts-->'+'-->JAR-->From modules with dependcies...

选择需要打包的模块,选择main函数所在的主类,选择copy to the output...,最后选择打包配置信息存放的目录,一般选择resoureces目录

然后选择Build-->Build Artifacts...

最后build,build成功之后的jar包在主项目根目录下的out目录下

3、将打包后的jar目录上传到linux环境运行
运行命令

yarn jar jar/hbase_jar/hbase.jar

1.4.3 从hdfs上的文件将数据插入到hbase表中

代码编写
File2TableApplication

public class File2TableApplication {

    public static void main(String[] args) throws Exception{

        // TooRunner可以运行mapreduce作业
        ToolRunner.run(new File2TableMapReduceTool(), args);
    }
}

File2TableMapReduceTool

public class File2TableMapReduceTool implements Tool {
    public int run(String[] args) throws Exception {

        // 获取作业
        Job job = Job.getInstance();
        job.setJarByClass(File2TableMapReduceTool.class);

        Path path = new Path("hdfs://hadoop101:9000/user.csv");

        // 设置FileInputFormat
        FileInputFormat.addInputPath(job, path);
//        FileInputFormat.setInputPaths(job, path);

        // mapper
        job.setMapperClass(ReadDataFromFileMapper.class);
        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(Put.class);

        //reducer
        TableMapReduceUtil.initTableReducerJob(
                "user",
                WriteDataToTableMRReducer.class,
                job
        );

        boolean b = job.waitForCompletion(true);
        return b ? JobStatus.State.SUCCEEDED.getValue() : JobStatus.State.FAILED.getValue();
    }

    public void setConf(Configuration conf) {

    }

    public Configuration getConf() {
        return null;
    }
}

ReadDataFromFileMapper

public class ReadDataFromFileMapper extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

        String[] fields = value.toString().split(",");

        String rowkey = fields[0];
        String name = fields[1];
        String age = fields[2];

        // 初始化rowkey
        ImmutableBytesWritable rowkeyWritable = new ImmutableBytesWritable(Bytes.toBytes(rowkey));

        // 创建put对象
        Put put = new Put(Bytes.toBytes(rowkey));

        // 增加列族:列:值
        put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("name"), Bytes.toBytes(name));
        put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("age"), Bytes.toBytes(age));

        context.write(rowkeyWritable, put);
    }
}

WriteDataToTableMRReducer

public class WriteDataToTableMRReducer extends TableReducer<ImmutableBytesWritable, Put, NullWritable> {
    @Override
    protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context) throws IOException, InterruptedException {

        for (Put put : values) {
            context.write(NullWritable.get(), put);
        }
    }
}

接下来是打可运行jar包及运行,打包步骤及运行方法详见上一个示例。

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
HBase是一种分布式的NoSQL数据库,它并不支持传统的SQL语句,但它提供了Scan类来进行数据的扫描和检索。在HBase中实现分页查询需要结合Scan类和分页参数来完成。 以下是在HBase中进行分页查询的步骤: 1. 创建Scan对象,设置起始行和结束行。 2. 设置分页参数,包括每页数据条数和当前页码。 3. 遍历Scan对象获取结果集,并计算偏移量和限制条数。 4. 对结果集进行分页处理,返回分页数据。 下面是一个示例代码,实现了在HBase中进行分页查询的功能: ```java // 创建Scan对象 Scan scan = new Scan(); scan.setStartRow(Bytes.toBytes(startRow)); scan.setStopRow(Bytes.toBytes(stopRow)); // 设置分页参数 int pageSize = 10; int pageNum = 1; // 计算偏移量和限制条数 int offset = (pageNum - 1) * pageSize; int limit = pageSize; // 遍历Scan对象获取结果集 ResultScanner scanner = table.getScanner(scan); Result[] results = scanner.next(limit + offset); // 对结果集进行分页处理 List<Map<String, Object>> pageData = new ArrayList<>(); for (int i = offset; i < results.length && i < offset + limit; i++) { Result result = results[i]; Map<String, Object> rowData = new HashMap<>(); for (Cell cell : result.rawCells()) { rowData.put(Bytes.toString(CellUtil.cloneQualifier(cell)), Bytes.toString(CellUtil.cloneValue(cell))); } pageData.add(rowData); } ``` 在上面的示例代码中,我们使用了HBase的Scan类来进行数据的扫描和检索,并结合分页参数和偏移量来完成分页查询的功能。注意,这里的分页查询是基于行级别的,而非基于列级别的。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值