HBase（4）- HBase API操作

jackyan163

已于 2022-01-30 09:24:09 修改

阅读量1.5k

点赞数

分类专栏：大数据文章标签： hbase big data zookeeper

于 2021-12-27 13:13:49 首次发布

本文链接：https://blog.csdn.net/qq_42409495/article/details/122169804

版权

大数据专栏收录该内容

32 篇文章 2 订阅

订阅专栏

1 HBase API操作

1.1 环境准备

新建项目后在pom.xml中添加依赖：

        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-server</artifactId>
            <version>1.4.13</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>1.4.13</version>
        </dependency>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.12</version>
        </dependency>

1.2 HBaseAPI代码编写

package com.jackyan.hbase;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class TestHbaseApi {
    Connection connection = null;
    Configuration configuration = null;
    String tableName = "jackyan:emp";

    /**
     * 获取连接
     * @throws IOException
     */
    @Before
    public void getConnect() throws IOException {
        configuration = HBaseConfiguration.create();
        connection = ConnectionFactory.createConnection(configuration);
        System.out.println(connection);
    }

    /**
     * 创建表
     * 1 判断namespace是否存在，不存在则先创建namespace
     */
    @Test
    public void createTable() throws IOException {

        HBaseAdmin admin = (HBaseAdmin) connection.getAdmin();
        boolean b = admin.tableExists(tableName);

        if (b) {
            System.out.println("表已存在！！！");
        }
        // 表不存在则创建表
        if(!b) {
            TableName table = TableName.valueOf(tableName);
            HTableDescriptor hTableDescriptor = new HTableDescriptor(table);
            HColumnDescriptor family = new HColumnDescriptor("basic");
            hTableDescriptor.addFamily(family);
            family = new HColumnDescriptor("info");
            hTableDescriptor.addFamily(family);

            try {
                admin.createTable(hTableDescriptor);
            } catch (NamespaceNotFoundException e) {
                // 命名空间不存在则先创建命名空间
                System.out.println("创建命名空间。。。");
                NamespaceDescriptor namespaceDescriptor = NamespaceDescriptor.create("jackyan").build();
                admin.createNamespace(namespaceDescriptor);

                // 再创建表
                admin.createTable(hTableDescriptor);
            }
            System.out.println("表创建成功！！！");
        }
        admin.close();
    }

    /**
     * 删除表
     * @throws IOException
     */
    @Test
    public void deleteTable() throws IOException {
        // 判断表是否存在
        HBaseAdmin admin = (HBaseAdmin) connection.getAdmin();
        // 表如果存在，则删除表
        if (admin.tableExists(tableName)) {
            // 先disable表
            admin.disableTable(tableName);
            // 删除表
            admin.deleteTable(tableName);
            System.out.println("表 " + tableName + " 删除成功！");
        } else {
            System.out.println("表 " + tableName + " 不存在！");
        }

        admin.close();
    }

    /**
     * 插入数据
     */
    @Test
    public void addRowData() throws IOException {
        // 获取表对象
        Table table = connection.getTable(TableName.valueOf(tableName));
        String rowkey = "100001";
        Put put = new Put(Bytes.toBytes(rowkey));
        String family = "basic";
        String column = "name";
        String value = "tom";
        put.addColumn(Bytes.toBytes(family), Bytes.toBytes(column), Bytes.toBytes(value));
        family = "basic";
        column = "age";
        value = "28";
        put.addColumn(Bytes.toBytes(family), Bytes.toBytes(column), Bytes.toBytes(value));
        family = "basic";
        column = "sex";
        value = "male";
        put.addColumn(Bytes.toBytes(family), Bytes.toBytes(column), Bytes.toBytes(value));
        table.put(put);
        rowkey = "100002";
        put = new Put(Bytes.toBytes(rowkey));
        family = "basic";
        column = "name";
        value = "jack";
        put.addColumn(Bytes.toBytes(family), Bytes.toBytes(column), Bytes.toBytes(value));
        family = "basic";
        column = "age";
        value = "26";
        put.addColumn(Bytes.toBytes(family), Bytes.toBytes(column), Bytes.toBytes(value));
        family = "basic";
        column = "sex";
        value = "male";
        put.addColumn(Bytes.toBytes(family), Bytes.toBytes(column), Bytes.toBytes(value));
        family = "info";
        column = "email";
        value = "12345678@qq.com";
        put.addColumn(Bytes.toBytes(family), Bytes.toBytes(column), Bytes.toBytes(value));
        family = "info";
        column = "phone";
        value = "12345678";
        put.addColumn(Bytes.toBytes(family), Bytes.toBytes(column), Bytes.toBytes(value));
        table.put(put);
        table.close();
        System.out.println("插入数据成功！");
        table.close();
    }

    /**
     * 获取所有行的数据
     */
    @Test
    public void getAllRowData() throws IOException {
        // 获取用于获取region数据的scan对象
        Scan scan = new Scan();

        // 获取表对象
        Table table = connection.getTable(TableName.valueOf(tableName));

        // 获取查询结果集
        ResultScanner resultScanner = table.getScanner(scan);

        for (Result result : resultScanner) {
            // 获取单元格数据
            Cell[] cells = result.rawCells();
            for (Cell cell : cells) {
                System.out.println("rowkey:" + Bytes.toString(CellUtil.cloneRow(cell)));
                System.out.println("family:" + Bytes.toString(CellUtil.cloneFamily(cell)));
                System.out.println("column:" + Bytes.toString(CellUtil.cloneQualifier(cell)));
                System.out.println("value:" + Bytes.toString(CellUtil.cloneValue(cell)));
            }
        }
        table.close();
    }

    /**
     * 获取一行的数据
     */
    @Test
    public void getOneRowData() throws IOException {

        String rowkey = "100002";
        Get get = new Get(Bytes.toBytes(rowkey));

        // 获取表对象
        Table table = connection.getTable(TableName.valueOf(tableName));

        // 获取查询结果集;
        Result result = table.get(get);

        // 获取单元格数据
        Cell[] cells = result.rawCells();
        for (Cell cell : cells) {
            System.out.println("rowkey:" + Bytes.toString(CellUtil.cloneRow(cell)));
            System.out.println("family:" + Bytes.toString(CellUtil.cloneFamily(cell)));
            System.out.println("column:" + Bytes.toString(CellUtil.cloneQualifier(cell)));
            System.out.println("value:" + Bytes.toString(CellUtil.cloneValue(cell)));
        }
        table.close();
    }

    /**
     * 获取指定列族：列的数据
     */
    @Test
    public void getRowQualifier() throws IOException {

        String rowkey = "100001";
        Get get = new Get(Bytes.toBytes(rowkey));
        String family = "basic";
        String qualifier = "name";
        get.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier));

        // 获取表对象
        Table table = connection.getTable(TableName.valueOf(tableName));

        // 获取查询结果集;
        Result result = table.get(get);

        // 获取单元格数据
        Cell[] cells = result.rawCells();
        for (Cell cell : cells) {
            System.out.println("rowkey:" + Bytes.toString(CellUtil.cloneRow(cell)));
            System.out.println("family:" + Bytes.toString(CellUtil.cloneFamily(cell)));
            System.out.println("column:" + Bytes.toString(CellUtil.cloneQualifier(cell)));
            System.out.println("value:" + Bytes.toString(CellUtil.cloneValue(cell)));
        }
        table.close();
    }

    /**
     * 删除多行数据
     */
    @Test
    public void deleteRowData() throws IOException {
        Table table = connection.getTable(TableName.valueOf(tableName));

        List<Delete> deleteList = new ArrayList<Delete>();
        String rowkey = "100001";
        Delete delete = new Delete(Bytes.toBytes(rowkey));
        deleteList.add(delete);

        rowkey = "100002";
        delete = new Delete(Bytes.toBytes(rowkey));
        deleteList.add(delete);
        table.delete(deleteList);
        table.close();
        System.out.println("删除数据成功！");
    }

    /**
     * 关闭连接
     */
    @After
    public void close() throws IOException {
        if (connection != null) {
            connection.close();
        }
        System.out.println("关闭资源。。。");
    }
}

1.3 HbaseUtil

package com.jackyan.hbase;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;
import java.util.Map;
import java.util.Set;

/**
 * Hbase工具类
 */
public class HbaseUtil {

    // 为了线程安全，故使用ThreadLocal
    private static ThreadLocal<Connection> connThreadLocal = new ThreadLocal<Connection>();

    /**
     * 获取连接
     */
    public static void getConnection() {
        /*Configuration conf = HBaseConfiguration.create();

        try {
            conn = ConnectionFactory.createConnection(conf);
        } catch (IOException e) {
            e.printStackTrace();
        }*/
        // 从ThreadLocal中获取连接对象
        Connection conn = connThreadLocal.get();

        // 如果获取不到连接，则创建连接并放入ThreadLocal中
        if (conn == null) {
            Configuration conf = HBaseConfiguration.create();
            try {
                conn = ConnectionFactory.createConnection(conf);
                connThreadLocal.set(conn);
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    /**
     * 插入单列数据
     */
    public static void put(String tablename, String rowkey, String famliyname, String column, String value) throws Exception {
        // 获取连接
        Connection conn = connThreadLocal.get();
        // 获取表
        Table table = conn.getTable(TableName.valueOf(tablename));
        // 将字符串转换成byte[]
        byte[] rowkeybyte = Bytes.toBytes(rowkey);
        Put put = new Put(rowkeybyte);
        put.addColumn(Bytes.toBytes(famliyname), Bytes.toBytes(column), Bytes.toBytes(value));
        table.put(put);
        table.close();
        System.out.println("ok");
    }

    /**
     * 插入多列数据
     * @param tablename
     * @param rowkey
     * @param famliyname
     * @param datamap 列、值对
     * @throws Exception
     */
    public static void put(String tablename, String rowkey, String famliyname, Map<String,String> datamap) throws Exception {
        // 获取连接
        Connection conn = connThreadLocal.get();
        // 获取表
        Table table = conn.getTable(TableName.valueOf(tablename));
        // 将字符串转换成byte[]
        byte[] rowkeybyte = Bytes.toBytes(rowkey);
        Put put = new Put(rowkeybyte);
        if(datamap != null){
            Set<Map.Entry<String,String>> set = datamap.entrySet();
            for(Map.Entry<String,String> entry : set){
                String column = entry.getKey();
                String value = entry.getValue();
                put.addColumn(Bytes.toBytes(famliyname), Bytes.toBytes(column), Bytes.toBytes(value));
            }
        }
        table.put(put);
        table.close();
        System.out.println("ok");
    }

    /**
     * 查询数据
     */
    public static String getdata(String tablename, String rowkey, String famliyname, String colum) throws Exception {
        // 获取连接
        Connection conn = connThreadLocal.get();
        // 获取表
        Table table = conn.getTable(TableName.valueOf(tablename));
        // 将字符串转换成byte[]
        byte[] rowkeybyte = Bytes.toBytes(rowkey);
        Get get = new Get(rowkeybyte);
        Result result =table.get(get);
        byte[] resultbytes = result.getValue(famliyname.getBytes(),colum.getBytes());
        if(resultbytes == null){
            return null;
        }

        return new String(resultbytes);
    }

    /**
     * 插入数据
     */
    public static void putdata(String tablename, String rowkey, String famliyname,String colum,String data) throws Exception {
        // 获取连接
        Connection conn = connThreadLocal.get();
        // 获取表
        Table table = conn.getTable(TableName.valueOf(tablename));
        Put put = new Put(rowkey.getBytes());
        put.addColumn(famliyname.getBytes(),colum.getBytes(),data.getBytes());
        table.put(put);
    }

    /**
     * 关闭连接
     */
    public static void close() {
        Connection conn = connThreadLocal.get();
        if (conn != null) {
            try {
                conn.close();
                connThreadLocal.remove();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

1.4 MapReduce

通过HBase的相关JavaAPI，我们可以实现伴随HBase操作的MapReduce过程，比如使用MapReduce将数据从本地文件系统导入到HBase的表中，比如我们从HBase中读取一些原始数据后使用MapReduce做数据分析。

1.4.1 官方HBase-MapReduce

1．查看HBase的MapReduce任务的执行

$ bin/hbase mapredcp

2．环境变量的导入
（1）执行环境变量的导入（临时生效，在命令行执行下述操作）

$ export HBASE_HOME=/opt/module/hbase
$ export HADOOP_HOME=/opt/module/hadoop
$ export HADOOP_CLASSPATH=`${HBASE_HOME}/bin/hbase mapredcp`

（2）永久生效：在/etc/profile配置

export HBASE_HOME=/opt/module/hbase
export HADOOP_HOME=/opt/module/hadoop

并在hadoop-env.sh中配置：（注意：在for循环之后配）

export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:/opt/module/hbase/lib/*

3．运行官方的MapReduce任务
-- 案例一：统计Student表中有多少行数据

$ yarn jar lib/hbase-server-1.4.13.jar rowcounter student

1.4.2 从hbase一个表将数据迁移到另一个表

1、代码编写
Table2TableApplication

public class Table2TableApplication {

    public static void main(String[] args) throws Exception{

        // TooRunner可以运行mapreduce作业
        ToolRunner.run(new Table2TableMapReduceTool(), args);
    }
}

Table2TableMapReduceTool

public class Table2TableMapReduceTool implements Tool {
    public int run(String[] args) throws Exception {

        // 获取作业
        Job job = Job.getInstance();
        job.setJarByClass(Table2TableMapReduceTool.class);

        // mapper
        TableMapReduceUtil.initTableMapperJob(
                "jackyan:emp",
                new Scan(),
                ReadDataFromTableMapper.class,
                ImmutableBytesWritable.class,
                Put.class,
                job
        );

        //reducer
        TableMapReduceUtil.initTableReducerJob(
                "emp",
                WriteDataToTableMRReducer.class,
                job
        );

        boolean b = job.waitForCompletion(true);
        return b ? JobStatus.State.SUCCEEDED.getValue() : JobStatus.State.FAILED.getValue();
    }

    public void setConf(Configuration conf) {

    }

    public Configuration getConf() {
        return null;
    }
}

ReadDataFromTableMapper

public class ReadDataFromTableMapper extends TableMapper<ImmutableBytesWritable, Put> {
    @Override
    protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {

        //将fruit的name和color提取出来，相当于将每一行数据读取出来放入到Put对象中。
        // 构建put
        Put put = new Put(key.get());

        // 循环遍历每个cell，cell也就是一列数据
        Cell[] cells = value.rawCells();

        //复制部分数据
        for(Cell cell: cells){
            //添加/克隆列族:info
            if("basic".equals(Bytes.toString(CellUtil.cloneFamily(cell)))){
                //添加/克隆列：name
                if("name".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))){
                    //将该列cell加入到put对象中
                    put.add(cell);
                    //添加/克隆列:age
                } else if("age".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))){
                    //向该列cell加入到put对象中
                    put.add(cell);
                    //添加/克隆列:sex
                } else if("sex".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))){
                    //向该列cell加入到put对象中
                    put.add(cell);
                }
            }
        }
        // 复制全部数据，要求两张表的结构一致
/*        for (Cell cell : cells) {
            put.addColumn(CellUtil.cloneFamily(cell), CellUtil.cloneQualifier(cell), CellUtil.cloneValue(cell));
        }*/

        //将读取到的每行数据写入到context中作为map的输出
        context.write(key, put);
    }
}

WriteDataToTableMRReducer

public class WriteDataToTableMRReducer extends TableReducer<ImmutableBytesWritable, Put, NullWritable> {
    @Override
    protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context) throws IOException, InterruptedException {

        for (Put put : values) {
            context.write(NullWritable.get(), put);
        }
    }
}

2、打包
此处需要打成可运行jar包
选中需要打包的项目或模块-->Project Structure-->Artifacts-->'+'-->JAR-->From modules with dependcies...

选择需要打包的模块，选择main函数所在的主类，选择copy to the output...，最后选择打包配置信息存放的目录，一般选择resoureces目录

然后选择Build-->Build Artifacts...

最后build，build成功之后的jar包在主项目根目录下的out目录下

3、将打包后的jar目录上传到linux环境运行
运行命令

yarn jar jar/hbase_jar/hbase.jar

1.4.3 从hdfs上的文件将数据插入到hbase表中

代码编写
File2TableApplication

public class File2TableApplication {

    public static void main(String[] args) throws Exception{

        // TooRunner可以运行mapreduce作业
        ToolRunner.run(new File2TableMapReduceTool(), args);
    }
}

File2TableMapReduceTool

public class File2TableMapReduceTool implements Tool {
    public int run(String[] args) throws Exception {

        // 获取作业
        Job job = Job.getInstance();
        job.setJarByClass(File2TableMapReduceTool.class);

        Path path = new Path("hdfs://hadoop101:9000/user.csv");

        // 设置FileInputFormat
        FileInputFormat.addInputPath(job, path);
//        FileInputFormat.setInputPaths(job, path);

        // mapper
        job.setMapperClass(ReadDataFromFileMapper.class);
        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(Put.class);

        //reducer
        TableMapReduceUtil.initTableReducerJob(
                "user",
                WriteDataToTableMRReducer.class,
                job
        );

        boolean b = job.waitForCompletion(true);
        return b ? JobStatus.State.SUCCEEDED.getValue() : JobStatus.State.FAILED.getValue();
    }

    public void setConf(Configuration conf) {

    }

    public Configuration getConf() {
        return null;
    }
}

ReadDataFromFileMapper

public class ReadDataFromFileMapper extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

        String[] fields = value.toString().split(",");

        String rowkey = fields[0];
        String name = fields[1];
        String age = fields[2];

        // 初始化rowkey
        ImmutableBytesWritable rowkeyWritable = new ImmutableBytesWritable(Bytes.toBytes(rowkey));

        // 创建put对象
        Put put = new Put(Bytes.toBytes(rowkey));

        // 增加列族：列：值
        put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("name"), Bytes.toBytes(name));
        put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("age"), Bytes.toBytes(age));

        context.write(rowkeyWritable, put);
    }
}

WriteDataToTableMRReducer

public class WriteDataToTableMRReducer extends TableReducer<ImmutableBytesWritable, Put, NullWritable> {
    @Override
    protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context) throws IOException, InterruptedException {

        for (Put put : values) {
            context.write(NullWritable.get(), put);
        }
    }
}

接下来是打可运行jar包及运行，打包步骤及运行方法详见上一个示例。

jackyan163

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
HBase（4）- HBase API操作

1 HBase API操作1.1 环境准备新建项目后在pom.xml中添加依赖： <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-server</artifactId> <version>1.4.13</version> &l
复制链接

扫一扫