Hbase

最新推荐文章于 2024-10-08 13:49:36 发布

挣他一个亿￥

最新推荐文章于 2024-10-08 13:49:36 发布

阅读量1.1k

点赞数

文章标签： hive spark big data

本文链接：https://blog.csdn.net/m0_49142509/article/details/121445718

版权

1.是什么?

hbase是一个数据模型，提供对海量数据的随机实时读/写访问。
构建在hadoop之hdfs之上，分布式面向列的数据库,有hdfs的分块存储、冗余、容错的优良特性

产生原因：
mapreduce/hdfs/hive等适合做批量数据处理，且只能以顺序方式访问数据。

2.重要概念

表，以"表"为单位组织数据，表由多行组成
行，行由一个RowKey和多个列族组成，一行一个RowKey为唯一标识。
列族，每一行由若干列族组成，每个列族下可包含多个列、列族是列共性的一些体现。例如（baseInfo（name,age,gender），addressInfo（province，city, email））
列限定符，列由列族和列限定符唯一指定，像如上的name、age即是baseInfo列族的列限定符
单元格，由RowKey、列族、列限定符唯一定位，单元格之中存放一个值和一个版本号
时间戳，即为版本号来标识插入或是修改时间，倒序排，最新的数据排在最前面

结构说明：
表是行的集合
行是列族的集合
列族是列的集合
列是键值对的集合

3.架构设计

在这里插入图片描述

HMaster 管理结点，管理增删改查操作、负载均衡调整Region分布
Zookeeper 存储HBase元数据
HRegionServer 实际工作结点，

负责Region读写的I/O请求、
切分本地Region，当StoreFile大小超过阈值，把当前Region切分成2个Region

HRegion 行，相当于hdfs中block
Store 列族，
StoreFile kv集足够多合并存储、MemStore 内存，先写内存再存磁盘
Hlog 先写日志，做灾难恢复使用，保证数据不丢失
DFS Client,通过它往HDFS保存

4.shell操作Hbase

. hbase shell 进入
. status 查看集群状态、version 版本、whoami 用户是谁

DDL操作

创建表 create （create “Student”，“base_info”,“advanced_info”）表s列族b a
list 列出所有表
disable 禁用表 is_disabled 是否被禁用
enable 启用一张表 is_enable 是否被启用
describe 查看表的描述
加入新列族
alter：修改表的结构 alter ‘Student’,‘private_info’ 加入private_info
exists：验证表是否存在 exists ‘Student’
drop：删除表，表需先禁用，然后才能删除disable ‘Student’ drop ‘Student’
disable_all：禁用多个表

DML操作

插入数据 put ‘Student’,‘r1’,‘base_info:username’,‘zhangsan’ 向当前空间下表Student的rowkey为r1的列族下添加数据
读取数据三种方式
- scan遍历全表 scan ‘table_name’
- scan范围查询 scan ‘Student’, { LIMIT=> 2,STARTROW => ‘r1’,ENDROW=>‘r2’}
- get按rowKey查询 get “table_name”,“rowid”
- 获取行中指定的列数据 get ‘Student’,‘r1’,‘base_info:username’
删除指定条件的列数据 delete ‘Student’,‘r1’,‘base_info:username’
清空表Truncate(只是清空数据) truncate ‘Student’
查看当前空间(数据库)下的所有表 list_namespace_tables’default’
修改列族的版本号个数 alter ‘Student4Job008’,NAME=>‘baseInfo’,VERSIONS=>2
查看表数据的指定版本个数 scan ‘Student4Job008’,{VERSIONS => 4}
查看表数据的指定所有版本对应的数据 scan ‘tablename’,{RAW=>true,VERSIONS => versionNumber}
创建命名空间 create_namespace ‘job007’
list_namespace：查看所有命名空间
指定的命名空间下创建表 create ‘job015:Student’,‘base_info’

5.Java操作Hbase

maven:

<dependency>
        <groupId>org.apache.hbase</groupId>
        <artifactId>hbase-client</artifactId>
        <version>1.1.2</version>
        <scope>provided</scope>
</dependency>

java操作hbase代码案例

import java.util.ArrayList;
import java.util.List;
import java.util.Random;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellScanner;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.util.Bytes;

/**
* @author 天亮教育
*/
public class HBaseOperator {
    // 用于链接hbase的连接器对象,类似于mysql jdbc的Connection
    public Connection connection;
    // 用hbase configuration初始化配置信息时会自动加载当前应用classpath下的hbase-site.xml
    public static Configuration configuration = HBaseConfiguration.create();

    // 初始化hbase操作对象
    public HBaseOperator() throws Exception {
        // ad = new HBaseAdmin(configuration); //过期了，推荐使用Admin
        configuration.set("hbase.zookeeper.quorum",
                "192.168.1.34,192.168.1.31,192.168.1.32,192.168.1.41");
        configuration.set("hbase.zookeeper.property.clientPort", "2181");
        configuration.set("zookeeper.znode.parent", "/hbase-unsecure");
        // 对connection初始化
        connection = ConnectionFactory.createConnection(configuration);
    }

    // 创建表
    public void createTable(String tablename, String... cf1) throws Exception {
        // 获取admin对象
        Admin admin = connection.getAdmin();
        // 创建tablename对象描述表的名称信息
        TableName tname = TableName.valueOf(tablename);// mytable
        // 创建HTableDescriptor对象，描述表信息
        HTableDescriptor tDescriptor = new HTableDescriptor(tname);
        // 判断是否表已存在
        if (admin.tableExists(tname)) {
            System.out.println("表" + tablename + "已存在");
            return;
        }
        // 添加表列簇信息
        for (String cf : cf1) {
            HColumnDescriptor famliy = new HColumnDescriptor(cf);
            tDescriptor.addFamily(famliy);
        }
        // 调用admin的createtable方法创建表
        admin.createTable(tDescriptor);
        System.out.println("表" + tablename + "创建成功");
    }

    // 删除表
    public void deleteTable(String tablename) throws Exception {
        Admin admin = connection.getAdmin();
        TableName tName = TableName.valueOf(tablename);
        if (admin.tableExists(tName)) {
            admin.disableTable(tName);
            admin.deleteTable(tName);
            System.out.println("删除表" + tablename + "成功！");
        } else {
            System.out.println("表" + tablename + "不存在。");
        }
    }

    // 新增数据到表里面Put
    public void putData(String table_name) throws Exception {
        TableName tableName = TableName.valueOf(table_name);
        Table table = connection.getTable(tableName);
        Random random = new Random();
        List<Put> batPut = new ArrayList<Put>();
        for (int i = 0; i < 10; i++) {
            // 构建put的参数是rowkey rowkey_i (Bytes工具类，各种java基础数据类型和字节数组之间的相互转换)
            Put put = new Put(Bytes.toBytes("rowkey_" + i));
            put.addColumn(Bytes.toBytes("user"), Bytes.toBytes("username"),
                    Bytes.toBytes("user_" + i));
            put.addColumn(Bytes.toBytes("user"), Bytes.toBytes("age"),
                    Bytes.toBytes((random.nextInt(50) + 1)+""));
            put.addColumn(Bytes.toBytes("user"), Bytes.toBytes("birthday"),
                    Bytes.toBytes("20170" + i + "01"));
            put.addColumn(Bytes.toBytes("content"), Bytes.toBytes("phone"),
                    Bytes.toBytes("电话_" + i));
            put.addColumn(Bytes.toBytes("content"), Bytes.toBytes("email"),
                    Bytes.toBytes("email_" + i));
            // 单条记录put
            // table.put(put);
            batPut.add(put);
        }
        table.put(batPut);
        System.out.println("表插入数据成功！");
    }

    // 查询数据
    public void getData(String table_Name) throws Exception {
        TableName tableName = TableName.valueOf(table_Name);
        Table table = connection.getTable(tableName);
        // 构建get对象
        List<Get> gets = new ArrayList<Get>();
        for (int i = 0; i < 5; i++) {
            Get get = new Get(Bytes.toBytes("rowkey_" + i));
            gets.add(get);
        }
        Result[] results = table.get(gets);
        for (Result result : results) {
            // 使用cell获取result里面的数据
            CellScanner cellScanner = result.cellScanner();
            while (cellScanner.advance()) {
                Cell cell = cellScanner.current();
                // 从单元格cell中把数据获取并输出
                // 使用 CellUtil工具类，从cell中把数据获取出来
                String famliy = Bytes.toString(CellUtil.cloneFamily(cell));
                String qualify = Bytes.toString(CellUtil.cloneQualifier(cell));
                String rowkey = Bytes.toString(CellUtil.cloneRow(cell));
                String value = Bytes.toString(CellUtil.cloneValue(cell));
                System.out.println("rowkey:" + rowkey + ",columnfamily:"
                        + famliy + ",qualify:" + qualify + ",value:" + value);
            }
        }
    }

    // 关闭连接
    public void cleanUp() throws Exception {
        connection.close();
    }

    // 测试相关功能方法
    public static void main(String[] args) throws Exception {
        HBaseOperator hbaseOperator = new HBaseOperator();
        hbaseOperator.createTable("zel", "user", "content");
        hbaseOperator.putData("zel");
        hbaseOperator.getData("zel");
        hbaseOperator.cleanUp();
    }
}