1.运行环境
HBase的安装文件为:hbase-2.4.5-bin.tar.gz 相关配置信息可以查看《HBase-2.4.5 单机版standalone模式安装配置》 其他环境如下:
# 只贴出版本信息
[root@tcloud ~]# hadoop version
Hadoop 3.1.3
[root@tcloud ~]# java -version
java version "1.8.0_251"
2.基本命令
# 1.打开 Hbase Shell
[root@tcloud ~]# hbase shell
# 无关信息不再贴出
HBase Shell
Use "help" to get list of supported commands.
Use "exit" to quit this interactive shell.
For Reference, please visit: http://hbase.apache.org/2.0/book.html#shell
Version 2.4.5, r03b8c0cf426cbae3284225b73040ec574d5bac34, Tue Jul 27 09:44:16 PDT 2021
Took 0.0019 seconds
hbase:001:0>
# 2.获取帮助
# 信息较多这里不再贴出
hbase:001:0> help
# 3.获取命令的详细信息
hbase:002:0> help 'status'
Show cluster status. Can be 'summary', 'simple', 'detailed', or 'replication'. The
default is 'summary'. Examples:
hbase> status
hbase> status 'simple'
hbase> status 'summary'
hbase> status 'detailed'
hbase> status 'replication'
hbase> status 'replication', 'source'
hbase> status 'replication', 'sink'
# 4.查看服务器状态【 跟 status 'summary' 等价 】
hbase:003:0> status
1 active master, 0 backup masters, 1 servers, 0 dead, 0.0000 average load
Took 1.2175 seconds
hbase:004:0> status 'simple'
active master: tcloud:16000 1631693245401
0 backup masters
1 live servers
tcloud:16020 1631693250043
requestsPerSecond=0.0, numberOfOnlineRegions=3, usedHeapMB=30, maxHeapMB=451,
numberOfStores=5, numberOfStorefiles=7, storefileUncompressedSizeMB=0,
storefileSizeMB=0, memstoreSizeMB=0, storefileIndexSizeKB=0,
readRequestsCount=13, filteredReadRequestsCount=2, writeRequestsCount=4,
rootIndexSizeKB=0, totalStaticIndexSizeKB=0, totalStaticBloomSizeKB=0,
totalCompactingKVs=4, currentCompactedKVs=4, compactionProgressPct=1.0,
coprocessors=[MultiRowMutationEndpoint]
0 dead servers
Aggregate load: 0, regions: 3
Took 1.1800 seconds
# 5.查看版本信息
hbase:005:0> version
2.4.5, r03b8c0cf426cbae3284225b73040ec574d5bac34, Tue Jul 27 09:44:16 PDT 2021
Took 0.0003 seconds
3.表操作
# 1.查看所有表
hbase:006:0> list
TABLE
teacher
1 row(s)
Took 0.0095 seconds
=> ["teacher"]
# 2.创建表
# 命令格式【create '表名称', '列族名称 1','列族名称 2','列名称 N'】
# 创建一张名为student的表,包含家庭信息(familyInfo)、学校信息(schoolInfo)两个列族
hbase:007:0> create 'student','familyInfo','schoolInfo'
Created table student
Took 0.8689 seconds
=> Hbase::Table - student
# 3.查看表的基本信息
# 命令格式【 desc '表名' 】
hbase:008:0> describe 'student'
Table student is ENABLED
student
COLUMN FAMILIES DESCRIPTION
{NAME => 'familyInfo', BLOOMFILTER => 'ROW', IN_MEMORY => 'false', VERSIONS => '1',
KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', COMPRESSION => 'NONE',
TTL => 'FOREVER', MIN_VERSIONS
=> '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}
{NAME => 'schoolInfo', BLOOMFILTER => 'ROW', IN_MEMORY => 'false', VERSIONS => '1',
KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', COMPRESSION => 'NONE',
TTL => 'FOREVER', MIN_VERSIONS
=> '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}
2 row(s)
Quota is disabled
Took 0.2976 seconds
# 4.表的启用/禁用及启用/禁用状态查询
# enable 和 disable 可以启用/禁用这个表
# is_enabled 和 is_disabled 来检查表是否被禁用【这两个命令使用一个就够了】
# 禁用表【这里不再贴出 Took xxx seconds 等无用或冗余信息】
hbase:009:0> disable 'student'
hbase:010:0> is_disabled 'student'
true
hbase:011:0> is_enabled 'student'
false
# 启用表【这里不再贴出 Took xxx seconds 等无用或冗余信息】
hbase:012:0> enable 'student'
hbase:013:0> is_disabled 'student'
false
hbase:014:0> is_enabled 'student'
true
# 5.检查表是否存在【HBase大小写敏感】
hbase:015:0> exists 'student'
Table student does exist
Took 0.0128 seconds
=> true
hbase:016:0> exists 'Student'
Table Student does not exist
Took 0.0071 seconds
=> false
# 6.删除表【需要先禁用表】
# 禁用表
hbase:017:0> disable 'student'
Took 0.3681 seconds
# 删除表
hbase:018:0> drop 'student'
Took 0.6911 seconds
4.数据增删改
表操作时将表删除了,先重建表 create 'student','familyInfo','schoolInfo'
# 1.添加列族
# 命令格式【 alter '表名', '列族名' 】
hbase:019:0> alter 'student', 'teacherInfo'
Updating all regions with the new schema...
1/1 regions updated.
Done.
Took 1.7037 seconds
# 查看一些列族信息【发现已经添加成功了】
hbase:020:0> desc 'student'
Table student is ENABLED
student
COLUMN FAMILIES DESCRIPTION
{NAME => 'familyInfo', BLOOMFILTER => 'ROW', IN_MEMORY => 'false', VERSIONS => '1',
KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', COMPRESSION => 'NONE',
TTL => 'FOREVER', MIN_VERSIONS
=> '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}
{NAME => 'schoolInfo', BLOOMFILTER => 'ROW', IN_MEMORY => 'false', VERSIONS => '1',
KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', COMPRESSION => 'NONE',
TTL => 'FOREVER', MIN_VERSIONS
=> '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}
{NAME => 'teacherInfo', BLOOMFILTER => 'ROW', IN_MEMORY => 'false', VERSIONS => '1',
KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', COMPRESSION => 'NONE',
TTL => 'FOREVER', MIN_VERSIONS
=> '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}
3 row(s)
Quota is disabled
Took 0.0465 seconds
# 2.删除列族(NAME METHOD 必须大写)
# 命令格式【 alter '表名', {NAME => '列族名', METHOD => 'delete'} 】
hbase:021:0> alter 'student', {NAME => 'familyInfo', METHOD => 'delete'}
Updating all regions with the new schema...
1/1 regions updated.
Done.
Took 1.6080 seconds
# 删除一个不存在的列族测试一下【居然没有报错】
hbase:022:0> alter 'student', {NAME => 'classInfo', METHOD => 'delete'}
Updating all regions with the new schema...
1/1 regions updated.
Done.
Took 1.6146 seconds
# 3.更改列族存储版本的限制
# 默认列族只存储一个版本的数据,如果需要存储多个版本的数据,则需要修改列族的属性。
hbase:023:0> alter 'student',{NAME => 'teacherInfo', VERSIONS => 3}
# 查看一下表信息【这里只贴出列族 teacherInfo 的信息可以到版本信息已经变为3】
hbase:024:0> desc 'student'
Table student is ENABLED
student
COLUMN FAMILIES DESCRIPTION
{NAME => 'teacherInfo', BLOOMFILTER => 'ROW', IN_MEMORY => 'false', VERSIONS => '3',
KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', COMPRESSION => 'NONE',
TTL => 'FOREVER', MIN_VERSIONS
=> '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}
# 4.插入数据
# 命令格式【 put '表名', '行键','列族:列','值' 】
# 如果新增数据的行键、列族、列与原有数据完全相同,则相当于更新操作【这里不再贴出 Took 信息】
put 'student', 'rowkey1','teacherInfo:name','tom'
put 'student', 'rowkey1','teacherInfo:birthday','1970-01-09'
put 'student', 'rowkey1','teacherInfo:age','51'
put 'student', 'rowkey1','schoolInfo:name','Harvard University'
put 'student', 'rowkey1','schoolInfo:localtion','Boston'
put 'student', 'rowkey2','teacherInfo:name','jack'
put 'student', 'rowkey2','teacherInfo:birthday','1968-08-22'
put 'student', 'rowkey2','teacherInfo:age','53'
put 'student', 'rowkey2','schoolInfo:name','Yale University'
put 'student', 'rowkey2','schoolInfo:localtion','New Haven'
put 'student', 'rowkey3','teacherInfo:name','maike'
put 'student', 'rowkey3','teacherInfo:birthday','1975-01-22'
put 'student', 'rowkey3','teacherInfo:age','46'
put 'student', 'rowkey3','schoolInfo:name','MIT'
put 'student', 'rowkey3','schoolInfo:localtion','New Haven'
# 5.获取指定行、指定行中的列族、列的信息
# 获取指定行中所有列的数据信息
hbase:040:0> get 'student','rowkey1'
COLUMN CELL
schoolInfo:localtion timestamp=2021-09-15T17:00:15.072, value=Boston
schoolInfo:name timestamp=2021-09-15T17:00:15.019, value=Harvard University
teacherInfo:age timestamp=2021-09-15T17:00:14.962, value=51
teacherInfo:birthday timestamp=2021-09-15T17:00:14.914, value=1970-01-09
teacherInfo:name timestamp=2021-09-15T17:00:14.868, value=tom
1 row(s)
Took 0.1176 seconds
# 获取指定行中指定列族下所有列的数据信息
hbase:041:0> get 'student','rowkey1','schoolInfo'
COLUMN CELL
schoolInfo:localtion timestamp=2021-09-15T17:00:15.072, value=Boston
schoolInfo:name timestamp=2021-09-15T17:00:15.019, value=Harvard University
1 row(s)
Took 0.0150 seconds
# 获取指定行中指定列族指定列的数据信息
hbase:042:0> get 'student','rowkey3','schoolInfo:name'
COLUMN CELL
schoolInfo:name timestamp=2021-09-15T17:00:15.416, value=MIT
1 row(s)
Took 0.0192 seconds
# 6.删除指定行中的指定列族下的列【只保留主要信息】无法实现删除这个列族或行
hbase:043:0> delete 'student','rowkey3','teacherInfo:age'
hbase:044:0> get 'student','rowkey3'
COLUMN CELL
schoolInfo:localtion timestamp=2021-09-15T17:00:17.554, value=New Haven
schoolInfo:name timestamp=2021-09-15T17:00:15.416, value=MIT
teacherInfo:birthday timestamp=2021-09-15T17:00:15.358, value=1975-01-22
teacherInfo:name timestamp=2021-09-15T17:00:15.337, value=maike
5.查询
hbase 中访问数据有两种基本的方式:
- 按指定 rowkey 获取数据:get 方法;
- 按指定条件获取数据:scan 方法。
scan 可以设置 begin 和 end 参数来访问一个范围内所有的数据。get 本质上就是 begin 和 end 相等的一种特殊的 scan。
# 1.Get查询
# 获取指定行中所有列的数据信息
hbase:045:0> get 'student','rowkey2'
COLUMN CELL
schoolInfo:localtion timestamp=2021-09-15T17:00:15.318, value=New Haven
schoolInfo:name timestamp=2021-09-15T17:00:15.287, value=Yale University
teacherInfo:age timestamp=2021-09-15T17:00:15.272, value=53
teacherInfo:birthday timestamp=2021-09-15T17:00:15.223, value=1968-08-22
teacherInfo:name timestamp=2021-09-15T17:00:15.175, value=jack
1 row(s)
Took 0.0131 seconds
# 获取指定行中指定列族下所有列的数据信息
hbase:046:0> get 'student','rowkey2','teacherInfo'
COLUMN CELL
teacherInfo:age timestamp=2021-09-15T17:00:15.272, value=53
teacherInfo:birthday timestamp=2021-09-15T17:00:15.223, value=1968-08-22
teacherInfo:name timestamp=2021-09-15T17:00:15.175, value=jack
1 row(s)
Took 0.0092 seconds
# 获取指定行中指定列的数据信息
hbase:047:0> get 'student','rowkey2','teacherInfo:name'
COLUMN CELL
teacherInfo:name timestamp=2021-09-15T17:00:15.175, value=jack
1 row(s)
Took 0.0066 seconds
# 2.查询整表数据
hbase:048:0> scan 'student'
ROW COLUMN+CELL
rowkey1 column=schoolInfo:localtion, timestamp=2021-09-15T17:00:15.072, value=Boston
rowkey1 column=schoolInfo:name, timestamp=2021-09-15T17:00:15.019, value=Harvard University
rowkey1 column=teacherInfo:age, timestamp=2021-09-15T17:00:14.962, value=51
rowkey1 column=teacherInfo:birthday, timestamp=2021-09-15T17:00:14.914, value=1970-01-09
rowkey1 column=teacherInfo:name, timestamp=2021-09-15T17:00:14.868, value=tom
rowkey2 column=schoolInfo:localtion, timestamp=2021-09-15T17:00:15.318, value=New Haven
rowkey2 column=schoolInfo:name, timestamp=2021-09-15T17:00:15.287, value=Yale University
rowkey2 column=teacherInfo:age, timestamp=2021-09-15T17:00:15.272, value=53
rowkey2 column=teacherInfo:birthday, timestamp=2021-09-15T17:00:15.223, value=1968-08-22
rowkey2 column=teacherInfo:name, timestamp=2021-09-15T17:00:15.175, value=jack
rowkey3 column=schoolInfo:localtion, timestamp=2021-09-15T17:00:17.554, value=New Haven
rowkey3 column=schoolInfo:name, timestamp=2021-09-15T17:00:15.416, value=MIT
3 row(s)
Took 0.0392 seconds
# 3.查询指定列簇的数据
hbase:049:0> scan 'student', {COLUMN=>'teacherInfo'}
ROW COLUMN+CELL
rowkey1 column=teacherInfo:age, timestamp=2021-09-15T17:00:14.962, value=51
rowkey1 column=teacherInfo:birthday, timestamp=2021-09-15T17:00:14.914, value=1970-01-09
rowkey1 column=teacherInfo:name, timestamp=2021-09-15T17:00:14.868, value=tom
rowkey2 column=teacherInfo:age, timestamp=2021-09-15T17:00:15.272, value=53
rowkey2 column=teacherInfo:birthday, timestamp=2021-09-15T17:00:15.223, value=1968-08-22
rowkey2 column=teacherInfo:name, timestamp=2021-09-15T17:00:15.175, value=jack
2 row(s)
Took 0.0495 seconds
# 4.条件查询
# 查询指定列的数据
hbase:050:0> scan 'student', {COLUMNS=> 'teacherInfo:name'}
ROW COLUMN+CELL
rowkey1 column=teacherInfo:name, timestamp=2021-09-15T17:00:14.868, value=tom
rowkey2 column=teacherInfo:name, timestamp=2021-09-15T17:00:15.175, value=jack
2 row(s)
Took 0.0141 seconds
# 除了列(COLUMNS)修饰词外还支持 Limit(限制查询结果行数),STARTROW(ROWKEY起始行会先根据这个 key 定位到 region 再向后扫描)、STOPROW(结束行)、TIMERANGE(限定时间戳范围)、VERSIONS(版本数)和 FILTER(按条件过滤行)等。
# 如下代表从 rowkey2 这个 rowkey 开始,查找下两个行的最新 3 个版本的 name 列的数据:
hbase:051:0> scan 'student', {COLUMNS=> 'teacherInfo:name',STARTROW => 'rowkey1',STOPROW =>'wrowkey3',LIMIT=>2, VERSIONS=>3}
ROW COLUMN+CELL
rowkey1 column=teacherInfo:name, timestamp=2021-09-15T17:00:14.868, value=tom
rowkey2 column=teacherInfo:name, timestamp=2021-09-15T17:00:15.175, value=jack
2 row(s)
Took 0.0345 seconds
# 5.条件过滤
hbase:052:0> help "scan"
The filter can be specified in two ways:
1. Using a filterString - more information on this is available in the
Filter Language document attached to the HBASE-4176 JIRA
2. Using the entire package name of the filter.
Some examples:
hbase> scan 'hbase:meta'
hbase> scan 'hbase:meta', {COLUMNS => 'info:regioninfo'}
hbase> scan 'ns1:t1', {COLUMNS => ['c1', 'c2'], LIMIT => 10, STARTROW => 'xyz'}
hbase> scan 't1', {COLUMNS => ['c1', 'c2'], LIMIT => 10, STARTROW => 'xyz'}
hbase> scan 't1', {COLUMNS => 'c1', TIMERANGE => [1303668804000, 1303668904000]}
hbase> scan 't1', {REVERSED => true}
hbase> scan 't1', {ALL_METRICS => true}
hbase> scan 't1', {METRICS => ['RPC_RETRIES', 'ROWS_FILTERED']}
hbase> scan 't1', {ROWPREFIXFILTER => 'row2', FILTER => "
(QualifierFilter (>=, 'binary:xyz')) AND (TimestampsFilter ( 123, 456))"}
hbase> scan 't1', {FILTER =>
org.apache.hadoop.hbase.filter.ColumnPaginationFilter.new(1, 0)}
hbase> scan 't1', {CONSISTENCY => 'TIMELINE'}
hbase> scan 't1', {ISOLATION_LEVEL => 'READ_UNCOMMITTED'}
hbase> scan 't1', {MAX_RESULT_SIZE => 123456}
# 6.版本查询
# (6)查询表的历史版本
hbase:053:0> create 'teacher', { NAME=> 'username', VERSIONS=> 5 }
Created table teacher
Took 0.8262 seconds
=> Hbase::Table - teacher
## 这里不再贴出 Took xxx seconds
hbase:014:0> put 'teacher','1','username','yuanzhengme'
hbase:015:0> put 'teacher','1','username','yuanzhengm'
hbase:016:0> put 'teacher','1','username','yuanzheng'
hbase:017:0> put 'teacher','1','username','yuanzhen'
hbase:018:0> put 'teacher','1','username','yuanzhe'
hbase:019:0> put 'teacher','1','username','yuanzh'
hbase:020:0> put 'teacher','1','username','yuanz'
hbase:021:0> put 'teacher','1','username','yuan'
## 简单查询 只显示最新数据
hbase:054:0> get 'teacher','1'
COLUMN CELL
username: timestamp=2021-09-14T14:47:03.841, value=yuan
1 row(s)
Took 0.9945 seconds
## 查询版本
hbase:055:0> get 'teacher', '1', { COLUMN=>'username', VERSIONS=>5 }
COLUMN CELL
username: timestamp=2021-09-14T14:47:03.841, value=yuan
username: timestamp=2021-09-14T14:47:02.285, value=yuanz
username: timestamp=2021-09-14T14:47:02.154, value=yuanzh
username: timestamp=2021-09-14T14:47:02.103, value=yuanzhe
username: timestamp=2021-09-14T14:47:02.026, value=yuanzhen
1 row(s)
Took 0.0193 seconds
## 查询版本【超出表设置的版本是无效的】
hbase:056:0> get 'teacher', '1', { COLUMN=> 'username', VERSIONS=> 6 }
COLUMN CELL
username: timestamp=2021-09-14T14:47:03.841, value=yuan
username: timestamp=2021-09-14T14:47:02.285, value=yuanz
username: timestamp=2021-09-14T14:47:02.154, value=yuanzh
username: timestamp=2021-09-14T14:47:02.103, value=yuanzhe
username: timestamp=2021-09-14T14:47:02.026, value=yuanzhen
1 row(s)
Took 0.0189 seconds
## 查询版本【可以查询少于设置的版本数】
hbase:057:0> get 'teacher', '1', { COLUMN=> 'username', VERSIONS=> 4 }
COLUMN CELL
username: timestamp=2021-09-14T14:47:03.841, value=yuan
username: timestamp=2021-09-14T14:47:02.285, value=yuanz
username: timestamp=2021-09-14T14:47:02.154, value=yuanzh
username: timestamp=2021-09-14T14:47:02.103, value=yuanzhe
1 row(s)
Took 0.3921 seconds
6.总结
跟Redis的shell命令类似,这些命令我们并不会经常使用,我们用的是HBase Java API或SQL中间层Phoenix。