HBase
1. 安装zookeeper
[root@hadoopnode01 ~]# tar -zvxf /root/zookeeper-3.4.6.tar.gz -C /home/
[root@hadoopnode01 home]# cp /home/zookeeper/conf/zoo_sample.cfg /home/zookeeper/conf/zk.cfg
[root@hadoopnode01 conf]# mkdir /home/zk_data
[root@hadoopnode01 conf]# vi zk.cfg
# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial
# synchronization phase can take
initLimit=10
# The number of ticks that can pass between
# sending a request and getting an acknowledgement
syncLimit=5
# the directory where the snapshot is stored.
# do not use /tmp for storage, /tmp here is just
# example sakes.zk
dataDir=/home/zk_data
# the port at which the clients will connect
clientPort=2181
# the maximum number of client connections.
[root@hadoopnode01 home]# /home/zookeeper/bin/zkServer.sh start /home/zookeeper/conf/zk.cfg
[root@hadoopnode01 home]# jps
64436 QuorumPeerMain
64461 Jps
2. 安装hbase
2.1 解压
tar -zxf hbase-1.2.4-bin.tar.gz -C /home/hbase
#修改环境变量
2.2 修改配置文件
/home/hbase/hbase-1.2.4/conf/hbase-site.xml
<property>
<name>hbase.rootdir</name>
<value>hdfs://hadoopnode01:9000/hbase</value>
</property>
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<property>
<name>hbase.zookeeper.quorum</name>
<value>hadoopnode01</value>
</property>
<property>
<name>hbase.zookeeper.property.clientPort</name>
<value>2181</value>
</property>
2.3 添加regionservers
/home/hbase/hbase-1.2.4/conf/regionservers
hadoopnode01
3. 启动
start|stop-hbase.sh
[root@hadoopnode01 ~]# jps
64849 HMaster
1489 DataNode
65073 Jps
64436 QuorumPeerMain
1668 SecondaryNameNode
62503 ResourceManager
64968 HRegionServer
62584 NodeManager
1401 NameNode
访问:`http://centos:16010`这是hbase中hmaster进程提供的web-ui页面,通过该页面可以查看hbase信息
3.1 脚本连接
hbase shell
4. shell操作
4.1 常见命令
status
,table_help
,version
,whoami
hbase(main):001:0> status
1 active master, 0 backup masters, 1 servers, 0 dead, 2.0000 average load
hbase(main):002:0> version
1.2.4, rUnknown, Wed Feb 15 18:58:00 CST 2017
hbase(main):003:0> whoami
root (auth:SIMPLE)
groups: root
4.2 关于命名空间
等同于Mysql数据库操作,在命令空间中有四个名词:
(1)tables(命名空间中的成员,可以理解为数据库),如果没有指定,则在default默认的命名空间中。
(2)RegionServer group:一个命名空间包含了默认的RegionServer Group
(3)Permission:权限,命名空间能够让我们来定义访问控制列表ACL(Access Control List)
(4)Quota:限额,可以强制一个命名空间可包含的region的数量
alter_namespace, create_namespace, describe_namespace, drop_namespace, list_namespace, list_namespace_tables
#创建命名空间 描述
hbase(main):002:0> create_namespace 'GetLogs', {'user'=>'zhangsan'}
hbase(main):003:0> describe_namespace 'GetLogs'
DESCRIPTION
{NAME => 'GetLogs', user => 'zhangsan'}
#删除命名空间
drop_namespace 'GetLogs'
#查看所有命名空间
hbase(main):004:0> list_namespace
NAMESPACE
GetLogs
default
hbase
#增删改
Alter namespace properties.
To add/modify a property:
hbase> alter_namespace 'ns1', {METHOD => 'set', 'PROPERTY_NAME' => 'PROPERTY_VALUE'}
To delete a property:
hbase> alter_namespace 'ns1', {METHOD => 'unset', NAME=>'PROPERTY_NAME'}
#修改命名空间 增加属性 关键字set
hbase(main):009:0> alter_namespace 'GetLogs',{METHOD=>'set','user'=>'lisi'}
#删除user属性 关键字unset
hbase(main):016:0> alter_namespace 'GetLogs',{METHOD=>'unset','NAME'=>'user'}
4.3 数据定义语言 DDL (表)
alter, alter_async, alter_status, create, describe, disable, disable_all, drop, drop_all, enable, enable_all, exists, get_table, is_disabled, is_enabled, list, locate_region, show_filters
4.3.1创建表
#创建表
hbase(main):005:0> create 'GetLogs:t_response_request','response','request'
#查看表的描述
hbase(main):006:0> describe 'GetLogs:test_response_request'
Table GetLogs:test_response_request is ENABLED
GetLogs:test_response_request
COLUMN FAMILIES DESCRIPTION
{NAME => 'request', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', COMPRESSION => 'NONE', MIN_VERSIONS => '0', BLOCKCACH
E => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}
{NAME => 'response', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', COMPRESSION => 'NONE', MIN_VERSIONS => '0', BLOCKCAC
HE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}
#查看list
hbase(main):005:0> list
TABLE
GetLogs:test_response_request
=> ["GetLogs:test_response_request"]
hbase(main):007:0>
t = get_table 'GetLogs:test_response_request'
t.sacn
4.3.2查看指定namespace的表
hbase(main):021:0> list_namespace_tables 'GetLogs'
TABLE
test_response_request
4.4 数据管理语言DML (CURD)
append, count, delete, deleteall, get, put, scan, truncate
4.4.1 put(插入|更新)
hbase(main):024:0> t=get_table "GetLogs:test_response_request"
# 插入数据行键RowKey为9989929391008468668008374520191204155654 列簇request:url 值为/member-client/member/card-info
hbase(main):002:0> t.put 9989929391008468668008374520191204155654 ,'request:url','/member-client/member/card-info'
或
hbase(main):002:0>put 'GetLogs:test_response_request', 9989929391008468668008374520191204155654,'request:url','/member-client/member/card-info'
4.4.2 get (取值)
hbase(main):005:0> t.get 9989929391008468668008374520191204155654
# 获取行键为1 列族为cf1:name 版本为3 的所有数据
hbase(main):007:0> request:url 9989929391008468668008374520191204155654 ,{COLUMNS=>'request:url',VERSIONS=>3}
# 获取行键为1 列族为cf1:name 时间戳为1547484378850~1558973012353 版本为3 的数据 (时间戳区间查询) 指定时间戳 TIMERANGE=>1558973012353
hbase(main):020:0> t.get 9989929391008468668008374520191204155654 ,{COLUMNS=>'request:url',TIMERANGE=>[1558973012353,1558973281488],VERSIONS=>2}
4.4.3 delete/deleteall (删除)
# 删除行键为1 版本为3 列蔟为cf1的数据(有可能会删除多个cell)
hbase(main):034:0> t.get 1,{VERSIONS=>3,COLUMNS=>'cf1'}
COLUMN CELL
cf1:name timestamp=1547484605994, value=zs1
cf1:name timestamp=1547484378850, value=zs
# 删除该版本以及该版本以前的所有数据
hbase(main):035:0> t.delete 1 ,'cf1:name',1558973012353
0 row(s) in 0.0200 seconds
hbase(main):036:0> t.get 1,{VERSIONS=>3,COLUMNS=>'cf1'}
COLUMN CELL
0 row(s) in 0.0290 seconds
# 删除该rowkey对应的所有列
hbase(main):037:0> t.deleteall 1
0 row(s) in 0.0100 seconds
4.4.4 scan
hbase(main):058:0> scan 'GetLogs:test_response_request', {COLUMNS=>['request'],STARTROW => '3',LIMIT=>3,REVERSED=>true}
ROW COLUMN+CELL
2 column=cf1:name, timestamp=1547485439614, value=ls
1 column=cf1:age, timestamp=1547485477592, value=18
1 column=cf1:name, timestamp=1547485432058, value=zs
4.4.5 count
t.count
4.4.6 append (追加)
t.append 1,'cf1:name','110'
hbase(main):063:0> t.get 1
COLUMN CELL
cf1:age timestamp=1547485477592, value=18
cf1:name timestamp=1547486003061, value=zs110
4.4.7 truncate (清空表数据)
hbase(main):067:0> truncate 'GetLogs:test_response_request'