服务器环境Thrift安装
下载地址:http://mirrors.hust.edu.cn/apache/thrift/
tar -xzvf thrift-0.13.0.tar.gz
./configure --with-cpp --with-boost --with-python --without-csharp --with-java --without-erlang --without-perl --with-php --without-php_extension --without-ruby --without-haskell --without-go
make
make install
启动HBase Thrift服务
hbase-2.2.2$ bin/hbase-daemon.sh start thrift
Python happybase库安装
在线:
pip install thrift
pip install happybase
离线:
https://pypi.org/project/thrift/#files
thrift-0.13.0>python setup.py install
https://pypi.org/project/happybase/#files
happybase-1.2.0>python setup.py install
happybase操作示例
# -*- coding:utf-8 -*-
import sys
import happybase
default_encoding = 'utf-8'
if sys.getdefaultencoding() != default_encoding:
reload(sys)
sys.setdefaultencoding(default_encoding)
class HBaseUtils(object):
def __init__(self, host, port, size):
self.pool = happybase.ConnectionPool(size=size, host=host, port=port)
'''
families = {"f1":dict(),"f2":dict()}
'''
def create_table(self, table_name, families):
try:
with self.pool.connection() as connection:
connection.create_table(table_name, families)
except Exception as e:
print e
def update_table_able(self, table_name, able):
try:
with self.pool.connection() as connection:
is_enabled = connection.is_table_enabled(table_name)
if able is True and not is_enabled:
connection.enable_table(table_name)
elif able is False and is_enabled:
connection.disable_table(table_name)
except Exception as e:
print e
def delete_table(self, table_name, disable=False):
try:
with self.pool.connection() as connection:
connection.delete_table(table_name, disable)
except Exception as e:
print e
def read_table(self, table_name):
with self.pool.connection() as connection:
return connection.table(table_name)
def read_tables(self):
with self.pool.connection() as connection:
return connection.tables()
def insert(self, table_name, row, data, timestamp=None, wal=True):
try:
with self.pool.connection() as connection:
connection.table(table_name).put(row, data, timestamp=timestamp, wal=wal)
except Exception as e:
print e
def insert_batch(self, table_name, data_list, batch_size=1000):
try:
with self.pool.connection() as connection:
with connection.table(table_name).batch(batch_size=batch_size) as batch:
for data in data_list:
batch.put(data['row'], data['data'], data['timestamp'] if 'timestamp' in data else None)
except Exception as e:
print e
def delete(self, table_name, row, columns=None, timestamp=None, wal=True):
try:
with self.pool.connection() as connection:
connection.table(table_name).delete(row, columns=columns, timestamp=timestamp, wal=wal)
except Exception as e:
print e
def read_row(self, table_name, row, columns=None, timestamp=None, include_timestamp=False):
try:
with self.pool.connection() as connection:
return connection.table(table_name).row(row, columns=columns, timestamp=timestamp, include_timestamp=include_timestamp)
except Exception as e:
print e
def read_rows(self, table_name, rows, columns=None, timestamp=None, include_timestamp=False, need_dict=False):
try:
with self.pool.connection() as connection:
result = connection.table(table_name).rows(rows, columns=columns, timestamp=timestamp, include_timestamp=include_timestamp)
return result if not need_dict else dict(result)
except Exception as e:
print e
def read_cells(self, table_name, row, column, versions=None, timestamp=None, include_timestamp=False):
try:
with self.pool.connection() as connection:
return connection.table(table_name).cells(row, column, versions=versions, timestamp=timestamp, include_timestamp=include_timestamp)
except Exception as e:
print e
def read_families(self, table_name):
try:
with self.pool.connection() as connection:
return connection.table(table_name).families()
except Exception as e:
print e
def read_regions(self, table_name):
try:
with self.pool.connection() as connection:
return connection.table(table_name).regions()
except Exception as e:
print e
def read_scan(self, table_name, row_start=None, row_stop=None, row_prefix=None, columns=None,
filter=None, timestamp=None, include_timestamp=False, batch_size=1000,
scan_batching=None, limit=None, sorted_columns=False, reverse=False):
try:
with self.pool.connection() as connection:
return connection.table(table_name).scan(row_start=row_start, row_stop=row_stop, row_prefix=row_prefix,
columns=columns, filter=filter, timestamp=timestamp,
include_timestamp=include_timestamp, batch_size=batch_size,
scan_batching=scan_batching, limit=limit,
sorted_columns=sorted_columns, reverse=reverse)
except Exception as e:
print e
if __name__ == '__main__':
hbase_utils = HBaseUtils(host="192.168.0.123", port=9090, size=5)
hbase_utils.create_table('user', {"basic": dict(), "profile": dict()})
hbase_utils.update_table_able('user', True)
table = hbase_utils.read_table('user')
print hbase_utils.read_tables()
hbase_utils.insert('user', '000001'.encode(), {"basic:name": "zhangsan", "basic:gender": "male"})
hbase_utils.insert('user', '000002'.encode(), {"basic:name": "lisi", "basic:gender": "female"})
data_list = [
{'row': '000003'.encode(), 'data': {"basic:name": "wangwu", "basic:gender": "male"}},
{'row': '000004'.encode(), 'data': {"basic:name": "maliu", "basic:gender": "female"}}
]
hbase_utils.insert_batch('user', data_list)
print hbase_utils.read_row('user', b'000001', include_timestamp=True)
print hbase_utils.read_row('user', b'000003', include_timestamp=False)
print hbase_utils.read_rows('user', [b'000001', b'000002'], include_timestamp=True)
print hbase_utils.read_rows('user', [b'000001', b'000002'], include_timestamp=True, need_dict=True)
print hbase_utils.read_rows('user', [b'000001', b'000002'], columns=['basic:name'], include_timestamp=True)
print hbase_utils.read_cells('user', b'000001', 'basic:name')
print hbase_utils.read_cells('user', b'000001', 'basic:name', include_timestamp=True)
print hbase_utils.read_families('user')
print hbase_utils.read_regions('user')
hbase_utils.insert('user', '000005'.encode(), {"basic:name": "shenqi", "basic:gender": "female"})
print hbase_utils.read_row('user', b'000005', include_timestamp=True)
hbase_utils.delete('user', '000005'.encode(), ['basic:name'])
print hbase_utils.read_row('user', b'000005', include_timestamp=True)
hbase_utils.delete('user', '000005'.encode())
print hbase_utils.read_row('user', b'000005', include_timestamp=True)
results = hbase_utils.read_scan('user', row_start=b'000001', row_stop=b'000003')
for result in results:
print result
No protocol version header 异常处理
确认hbase thrift服务启动是否正常,hbase-site.xml文件注释thrift相关配置