启动 hbase
bin/start-hbase.sh
启动 thrift 服务
bin/hbase-daemon.sh start thrift
连接 hbase 数据库
import happybase
connection = happybase.Connection(host='qq3', port=9090)
happybase.Connection(host=’localhost’, port=9090, timeout=None, autoconnect=True, table_prefix=None, table_prefix_separator=b’_’, compat=’0.98’, transport=’buffered’, protocol=’binary’)
host:主机名
port:端口
timeout:超时时间
autoconnect:连接是否直接打开
table_prefix:用于构造表名的前缀
table_prefix_separator:用于table_prefix的分隔符
compat:兼容模式
transport:运输模式
protocol:协议
获取所有表名
table_name_list = connection.tables()
print(table_name_list)
[b'ConfigurationManagementGraph', b'actor', b'janusgraph', b'new_actor', b'person']
创建新表:
connection = happybase.Connection(host='qq3', port=9090)
families = {
'info': dict(),
'score': dict()
}
connection.create_table('students', families)
table_name_list = connection.tables()
print(table_name_list)
[b'ConfigurationManagementGraph', b'actor', b'janusgraph', b'new_actor', b'person', b'students']
禁用表
connection.disable_table(name)
启用表
connection.enable_table(name)
删除表
connection.delete_table(name,disable=False)
disable:是否先禁用:为True时可直接删除已启用的表
否则应该先禁用再删除
判断表是否被启用 返回一个bool值
connection.is_table_enabled(name)
获取表的所有列簇信息
connection = happybase.Connection(host='qq3', port=9090)
students_table = connection.table('students')
info = students_table.families()
print(info)
{b’info’: {‘name’: b’info:’, ‘max_versions’: 3, ‘compression’: b’NONE’, ‘in_memory’: False, ‘bloom_filter_type’: b’NONE’, ‘bloom_filter_vector_size’: 0, ‘bloom_filter_nb_hashes’: 0, ‘block_cache_enabled’: False, ‘time_to_live’: 2147483647}, b’score’: {‘name’: b’score:’, ‘max_versions’: 3, ‘compression’: b’NONE’, ‘in_memory’: False, ‘bloom_filter_type’: b’NONE’, ‘bloom_filter_vector_size’: 0, ‘bloom_filter_nb_hashes’: 0, ‘block_cache_enabled’: False, ‘time_to_live’: 2147483647}}
插入/修改数据
put(self, row, data, timestamp=None, wal=True)
向表中插入两条数据
connection = happybase.Connection(host='qq3', port=9090)
students_table = connection.table('students')
data_1 = {"info:name":'lucy',"info:age":"16","info:gender":"M","score:Math":"87","score:Chinese":"78","score:English":"80"}
data_2 = {"info:name":'jack',"info:age":"15","info:gender":"F","score:Math":"90","score:Chinese":"75","score:English":"78"}
students_table.put('2019030101',data_1)
students_table.put('2019030102',data_2)
查询单条数据 返回结果为字典
row(self, row, columns=None, timestamp=None, include_timestamp=False)
connection = happybase.Connection(host='qq3', port=9090)
students_table = connection.table('students')
data_1 = students_table.row('2019030101')
print(data_1)
{b'info:age': b'16', b'info:gender': b'M', b'info:name': b'lucy', b'score:Chinese': b'78', b'score:English': b'80', b'score:Math': b'87'}
查询多条数据 返回结果为列表
rows(self, rows, columns=None, timestamp=None,include_timestamp=False)
connection = happybase.Connection(host='qq3', port=9090)
students_table = connection.table('students')
data = students_table.rows(rows=['2019030101','2019030102'],columns=['info:name','score:English'])
print(data)
[(b'2019030101', {b'info:name': b'lucy', b'score:English': b'80'}), (b'2019030102', {b'info:name': b'jack', b'score:English': b'78'})]
scan 查看数据 scan生成的是一个迭代器,可用next()取数据,也可遍历取数据
scan(self, row_start=None, row_stop=None, row_prefix=None, columns=None, filter=None, timestamp=None, include_timestamp=False, batch_size=1000, scan_batching=None, limit=None, sorted_columns=False, reverse=False)
row_start:起始行,默认None,即第一行,可传入行号指定从哪一行开始
row_stop:结束行,默认None,即最后一行,可传入行号指定到哪一行结束(不获取此行数据)
row_prefix:行号前缀,默认为None,即不指定前缀扫描,可传入前缀来扫描符合此前缀的行
columns:列,默认为None,即获取所有列,可传入一个list或tuple来指定获取列
filter:过滤字符串
timestamp:时间戳。默认为None,即返回最大的那个时间戳的数据。可传入一个时间戳来获取小于此时间戳的最大时间戳的版本数据
include_timestamp:是否返回时间戳数据,默认为False
batch_size:用于检索结果的批量大小
scan_batching:服务端扫描批处理
limit:数量
sorted_columns:是否返回排序的列(根据行名称排序)
reverse:是否执行反向扫描
connection = happybase.Connection(host='qq3', port=9090)
students_table = connection.table('students')
res = students_table.scan()
print(next(res))
for i in res:
print(i)
b'2019030101', {b'info:age': b'16', b'info:gender': b'M', b'info:name': b'lucy', b'score:Chinese': b'78', b'score:English': b'80', b'score:Math': b'87'})
(b'2019030102', {b'info:age': b'15', b'info:gender': b'F', b'info:name': b'jack', b'score:Chinese': b'75', b'score:English': b'78', b'score:Math': b'90'})
批量插入数据
batch(self, timestamp=None, batch_size=None, transaction=False, wal=True)
connection = happybase.Connection(host='qq3', port=9090)
students_table = connection.table('students')
bat = students_table.batch()
bat.put('2019030103', {"info:name":'lili',"info:age":"15","info:gender":"F","score:Math":"82","score:Chinese":"73","score:English":"81"})
bat.put('2019030104', {"info:name":'rose',"info:age":"18","info:gender":"F","score:Math":"77","score:Chinese":"80","score:English":"88"})
bat.put('2019030105', {"info:name":'tom',"info:age":"16","info:gender":"M","score:Math":"87","score:Chinese":"73","score:English":"92"})
bat.send()
res = students_table.scan()
for i in res:
print(i)
(b'2019030101', {b'info:age': b'16', b'info:gender': b'M', b'info:name': b'lucy', b'score:Chinese': b'78', b'score:English': b'80', b'score:Math': b'87'})
(b'2019030102', {b'info:age': b'15', b'info:gender': b'F', b'info:name': b'jack', b'score:Chinese': b'75', b'score:English': b'78', b'score:Math': b'90'})
(b'2019030103', {b'info:age': b'15', b'info:gender': b'F', b'info:name': b'lili', b'score:Chinese': b'73', b'score:English': b'81', b'score:Math': b'82'})
(b'2019030104', {b'info:age': b'18', b'info:gender': b'F', b'info:name': b'rose', b'score:Chinese': b'80', b'score:English': b'88', b'score:Math': b'77'})
(b'2019030105', {b'info:age': b'16', b'info:gender': b'M', b'info:name': b'tom', b'score:Chinese': b'73', b'score:English': b'92', b'score:Math': b'87'})
删除数据 可以整条删除 或者 删除某些column
delete(self, row, columns=None, timestamp=None, wal=True)
connection = happybase.Connection(host='qq3', port=9090)
students_table = connection.table('students')
students_table.delete('2019030101',['info:name','score:English'])
data_1 = students_table.row('2019030101')
print(data_1)
students_table.delete('2019030101')
data_1 = students_table.row('2019030101')
print(data_1)
{b'info:age': b'16', b'info:gender': b'M', b'score:Chinese': b'78', b'score:Math': b'87'}
{}
使用with来管理batch 写入或者删除
connection = happybase.Connection(host='qq3', port=9090)
students_table = connection.table('students')
with students_table.batch() as bat:
bat.delete('2019030102')
bat.delete('2019030103')
bat.delete('2019030104')
bat.put('2019030106',{"info:name":'tims',"info:age":"16","info:gender":"M","score:Math":"87","score:Chinese":"73","score:English":"92"})
res = students_table.scan()
for i in res:
print(i)
(b'2019030105', {b'info:age': b'16', b'info:gender': b'M', b'info:name': b'tom', b'score:Chinese': b'73', b'score:English': b'92', b'score:Math': b'87'})
(b'2019030106', {b'info:age': b'16', b'info:gender': b'M', b'info:name': b'tims', b'score:Chinese': b'73', b'score:English': b'92', b'score:Math': b'87'})