Hbase架构、读写流程、HbaseAPI

宇文心亘

已于 2022-05-03 15:13:56 修改

阅读量1k

点赞数

文章标签： hbase

于 2022-05-02 15:50:20 首次发布

本文链接：https://blog.csdn.net/weixin_52923290/article/details/124540420

版权

数加学习专栏收录该内容

66 篇文章 1 订阅

订阅专栏

一、Hbase架构

1、简单整体架构

2、hbase内部架构

二、hbase读写文件流程

1、写流程

2、读流程

三、HbaseAPI编程

1、创建命名空间

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.NamespaceDescriptor;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import java.io.IOException;
public class Code02CreateNamespace {
    private static Connection connection;
    static {
        // 创建 Configuration 对象 用来设置Zookeeper连接
        Configuration conf = new Configuration();
        conf.set("hbase.zookeeper.quorum","node1,node2,master");
        try {
            //  创建连接
            connection = ConnectionFactory.createConnection(conf);
        } catch (IOException e) {
            e.printStackTrace();
            System.err.println("未正确获取到连接信息");
        }
    }
    public static void main(String[] args) throws IOException {
        // 获取Admin对象用来做命名空间及表操作
        Admin admin = connection.getAdmin();
        NamespaceDescriptor namespace = NamespaceDescriptor.create("jan").build();
        // 在Hbase中创建NameSpace
        admin.createNamespace(namespace);
        // 关闭连接
        admin.close();
        connection.close();
    }
}

2、创建表并给定多个列族

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;

import java.io.IOException;
public class Code04CreateTable {
    private static Connection connection;
    static {
        Configuration conf = new Configuration();
        conf.set("hbase.zookeeper.quorum","node1:2181,node2:2181,master:2181");
        try {
            connection = ConnectionFactory.createConnection(conf);
        } catch (IOException e) {
            e.printStackTrace();
            System.err.println("未正确获取到连接信息");
        }
    }
    //创建表时给定多个列族
    public static void createtable(String tableName,String... familyNames) throws IOException {

        // 通过TableName包装String类型的表名，并且创建createTable方法所需的HTableDescriptor的对象
        HTableDescriptor hTableDescriptor = new HTableDescriptor(TableName.valueOf(tableName));
        for (String familyName : familyNames) {
            //创建列族
            HColumnDescriptor info = new HColumnDescriptor(familyName);
            // 增加列族
            hTableDescriptor.addFamily(info);
        }
        Admin admin = connection.getAdmin();
        admin.createTable(hTableDescriptor);
        admin.close();
        connection.close();
    }
    public static void main(String[] args) throws IOException {
        createtable("jan:tbl2","info1","info2");
    }
}

3、删除表

//判断表存不存在
    public static boolean exists(String tableName) throws IOException {
        boolean exists = admin.tableExists(TableName.valueOf(tableName));
        return exists;
    }
    //删除表
    public static void deleteTable(String tableName) throws IOException {
        admin.disableTable(TableName.valueOf(tableName));
        admin.deleteTable(TableName.valueOf(tableName));
    }
    //先判断表存不存在，若存在则删除
    public static void main(String[] args) throws IOException {
        String tableName = "jan:tbl2";
        if(!exists(tableName)){
            System.err.println("表不存在，请重新输入...");
        }
        deleteTable(tableName);
    }

4、给表中put数据

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;

public class Code07PutData {
    private static Connection connection;
    static {
        Configuration conf = new Configuration();
        conf.set("hbase.zookeeper.quorum","node1:2181,node2:2181,master:2181");
        try {
            connection = ConnectionFactory.createConnection(conf);
        } catch (IOException e) {
            e.printStackTrace();
            System.err.println("未正确获取到连接信息");
        }
    }
    public static void main(String[] args) throws IOException {
        // 1.Put操作需要先获取Table类对象
        Table table = connection.getTable(TableName.valueOf("jan:tbl1"));
        // 3.创建Put对象需要提供RowKey的Bytes数组
        // Params: row – row key
        Put put = new Put(Bytes.toBytes("1002"));
        // 添加具体的数据内容
        put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("name"),Bytes.toBytes("zhangsan"));
        put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("age"),Bytes.toBytes("18"));
        put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("gender"),Bytes.toBytes("man"));
        put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("clazz"),Bytes.toBytes("1"));
        // 2.put方法需要提供Put类对象
        table.put(put);
        // 关闭连接
        table.close();
        connection.close();
    }
}

工作中常用IO流

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;


//工作中推荐使用如下方法：
public class Code13PutData {
    private static Connection connection;

    static {
        Configuration conf = new Configuration();
        conf.set("hbase.zookeeper.quorum", "node1:2181,node2:2181,master:2181");
        try {
            connection = ConnectionFactory.createConnection(conf);
        } catch (IOException e) {
            e.printStackTrace();
            System.err.println("未正确获取到连接信息");
        }
    }

    public static void main(String[] args) throws IOException {
        Table table = connection.getTable(TableName.valueOf("jan:tbl1"));

        // 创建读取IO流进行数据读取
        BufferedReader bufferedReader = new BufferedReader(new FileReader("D:\\CodeSpace\\hbasedemo15\\data\\students.txt"));
        String line;
        List<Put> putList = new ArrayList<>();
        while ((line = bufferedReader.readLine()) != null) {
            String[] columns = line.split(",");
            Put put = new Put(Bytes.toBytes(columns[0]));
            put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("name"), Bytes.toBytes(columns[1]));
            put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("age"), Bytes.toBytes(columns[2]));
            put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("gender"), Bytes.toBytes(columns[3]));
            put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("clazz"), Bytes.toBytes(columns[4]));
            putList.add(put);

            // putList中的数据大于等于100时,对数据进行上传并情况putList防止造成数据重复上传
            if (putList.size() >= 100) {
                table.put(putList);
                putList.clear();
            }
        }

        // putList中的数据没有上传完 继续上传
        if(!putList.isEmpty()){
            table.put(putList);
        }

        table.close();
        connection.close();
    }
}

5、get表中的数据

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;
//该样例中的方法只能获取部分列数据，列的信息需要手动给定
public class Code08GetData {
    private static Connection connection;

    static {
        Configuration conf = new Configuration();
        conf.set("hbase.zookeeper.quorum", "node1:2181,node2:2181,master:2181");
        try {
            connection = ConnectionFactory.createConnection(conf);
        } catch (IOException e) {
            e.printStackTrace();
            System.err.println("未正确获取到连接信息");
        }
    }
    public static void main(String[] args) throws IOException {
        Table table = connection.getTable(TableName.valueOf("jan:tbl1"));
        Get get = new Get(Bytes.toBytes("1001"));
        Result result = table.get(get);
        String rowKey = Bytes.toString(result.getRow());
        String name = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("name")));
        String age = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("age")));
        String gender = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("gender")));
        System.out.println("rowKey =>" + rowKey + " " + name + ":" + age + ":" + gender);
        table.close();
        connection.close();
    }
}

6、delete表中数据

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;

public class Code09deleteData {
    private static Connection connection;
    static {
        Configuration conf = new Configuration();
        conf.set("hbase.zookeeper.quorum", "node1:2181,node2:2181,master:2181");
        try {
            connection = ConnectionFactory.createConnection(conf);
        } catch (IOException e) {
            e.printStackTrace();
            System.err.println("未正确获取到连接信息");
        }
    }
    public static void main(String[] args) throws IOException {
        Table table = connection.getTable(TableName.valueOf("jan:tbl1"));
        Delete delete = new Delete(Bytes.toBytes("1001"));
        table.delete(delete);
        table.close();
        connection.close();
    }
}

7、scan获取表中多列数据

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;

public class Code11ScanData {
    private static Connection connection;
    static {
        Configuration conf = new Configuration();
        conf.set("hbase.zookeeper.quorum", "node1:2181,node2:2181,master:2181");
        try {
            connection = ConnectionFactory.createConnection(conf);
        } catch (IOException e) {
            e.printStackTrace();
            System.err.println("未正确获取到连接信息");
        }
    }
    public static void main(String[] args) throws IOException {
        Table table = connection.getTable(TableName.valueOf("jan:tbl1"));

        Scan scan = new Scan();
        scan.setMaxVersions(3);
        ResultScanner scanner = table.getScanner(scan);

        for (Result result : scanner) {
            // 获取一个RowKey中的所有Cells
            String rowKey = Bytes.toString(result.getRow());
            Cell[] cells = result.rawCells();
            for (Cell cell : cells) {
                String Family = Bytes.toString(CellUtil.cloneFamily(cell));
                String Qualifier = Bytes.toString(CellUtil.cloneQualifier(cell));
                String Value = Bytes.toString(CellUtil.cloneValue(cell));
                System.out.println("rowKey:"+rowKey+" Family:"+Family+" Qualifier"+ Qualifier + "Value:"+Value);
            }
        }
        table.close();
        connection.close();
    }
}