操作HBase方式有:原生HBase API、Spring's HBase API、Phoenix、Spark等。
本文围绕Spring's HBase API展开,Maven坐标如下所示,
org.springframework.data
spring-data-hadoop-hbase
2.5.0.RELEASE
1、HbaseAccessor抽象类,HBase访问高层抽象
访问器提供HBase访问相关配置处理。实现InitializingBean接口,属性加载后校验配置是否为空,并再次尝试获取编码字符集。
rpublic abstract class HbaseAccessor implements InitializingBean {
// 编码类型 private String encoding;
// 指定编码字符集 private Charset charset = HbaseUtils.getCharset(encoding);
// 创建及释放HBase表工厂接口 private HTableInterfaceFactory tableFactory;
// Hadoop配置信息 private Configuration configuration;
@Override
public void afterPropertiesSet() {
Assert.notNull(configuration, " a valid configuration is required");
// detect charsetcharset = HbaseUtils.getCharset(encoding);
}
// setter & getter ...
public void setTableFactory(HTableInterfaceFactory tableFactory) {
this.tableFactory = tableFactory;
}
...
}
HbaseAccessor抽象类是HbaseInterceptor(拦截器)和HbaseTemplate(模版)的抽象父类。具体继承层次如图1所示,图1,HbaseAccessor类继承层次图
2、HbaseOperations接口,HBase CRUD操作抽象
封装CRUD操作命令,具体实现延迟到HBaseTemplate实现。定义回调接口规范,让子类模版模式的使用更加灵活。其中exectute方法是其它方法的基础实现。
public interface HbaseOperations {
// CRUD操作, 具体操作可以延迟到HBaseTemplate调用时决定. 该接口其它方法内部亦使用该方法. T execute(String tableName, TableCallback action);
// retrieve T find(String tableName, String family, final ResultsExtractor action);
T find(String tableName, String family, String qualifier, final ResultsExtractor action);
T find(String tableName, final Scan scan, final ResultsExtractor action);
List find(String tableName, String family, final RowMapper action);
List find(String tableName, String family, String qualifier, final RowMapper action);
List find(String tableName, final Scan scan, final RowMapper action);
T get(String tableName, String rowName, final RowMapper mapper);
T get(String tableName, String rowName, String familyName, final RowMapper mapper);
T get(String tableName, final String rowName, final String familyName, final String qualifier, final RowMapper mapper);
// 创建或更新单行单列数据 void put(String tableName, final String rowName, final String familyName, final String qualifier, final byte[] data);
// 删单行列族内所有列 void delete(String tableName, final String rowName, final String familyName);
// 删单行单列 void delete(String tableName, final String rowName, final String familyName, final String qualifier);
}
3、TableCallback接口,定义操作回调规范
回调接口定义对HBase表操作的回调规范。
public interface TableCallback {
/*** 表内部进行操作* @param table 表实体* @return T 操作结果*/
T doInTable(HTableInterface table) throws Throwable;
}
其中HTableInterface接口继承关系如图2所示,图2,HTableInterface接口继承关系
具体操作可参考以下范例代码(经过生产环境验证),
@Override
public Optional insert(final String rowKey, final Map cellData, long timestamp) {
logger.warn("[更新数据] 总共需要保存{}列数据", cellData.size());
final List putList = new ArrayList<>(256);
return Optional.of(
hbaseTemplate.execute(_table, new TableCallback() {
@Override
public Boolean doInTable(final HTableInterface table) throws Throwable {
cellData.entrySet()
.stream()
.filter(entry -> Objects.nonNull(entry.getValue()))
.forEachOrdered(entry -> {
final Put put = new Put(Bytes.toBytes(rowKey), timestamp);
put.addColumn(Bytes.toBytes(_columnFamily),
Bytes.toBytes(entry.getKey()), ByteUtils.toByteArray(entry.getValue()));
putList.add(put);
});
logger.warn("[更新数据] 数据开始入库...");
long startTime = System.currentTimeMillis();
try {
table.put(putList);
logger.warn("[更新数据] 更新结束,共计入库(内存){}列,耗时{}毫秒",
putList.size(), System.currentTimeMillis() - startTime);
return Boolean.TRUE;
} catch (final IOException e) {
logger.error("[更新数据] 更新带版本号列失败. e = {}", e.getMessage());
return Boolean.FALSE;
} finally {
putList.clear();
}
}
})
);
}
4、ResultsExtractor接口,结果集抽取器抽象
该接口定义了从结果器扫描器中抽取结果的操作规范,用户必须处理所有结果集数据。
public interface ResultsExtractor {
/*** 从结果集扫描器中提取数据* @Param results 数据结果集扫描器(可基于其提取出数据)* @Return T 提取的结果. 如果为Get操作则返回单行; 如果为Scan操作则一般返回多行*/
T extractData(ResultScanner results) throws Exception;
}结果扫描器接口,提供获取下一行结果、获取后连续N行结果、关闭扫描器功能。
public interface ResultScanner extends Closeable, Iterable {
/*** 获取下一行的结果*/
Result next() throws IOException;
/*** 获取后连续N行的结果*/
Result [] next(int nbRows) throws IOException;
/** 关闭扫描器 */
@Override
void close();
}
5、RowMapperResultsExtractor类,结果集抽取集实现,基于行过滤。
该类包含两部分,其一,具有RowMapper功能;其二,具有ResultScanner功能。前者为后者服务,即单行映射操作是从结果扫描器多行抽取的基础。
class RowMapperResultsExtractor implements ResultsExtractor> {
// 单行映射器 private final RowMapper rowMapper;
public RowMapperResultsExtractor(RowMapper rowMapper) {
Assert.notNull(rowMapper, "RowMapper is required");
this.rowMapper = rowMapper;
}
public List extractData(ResultScanner results) throws Exception {
List rs = new ArrayList();
int rowNum = 0;
for (Result result : results) {
// 多行操作本质上也是单行操作的累加, 因此需要内部维护RowMapper rs.add(this.rowMapper.mapRow(result, rowNum++));
}
return rs;
}
}
结果集抽取器接口与其实现基于行映射的结果集抽取器关系如图3所示,图3,结果集采集器接口及其实现
6、RowMapper,单行映射器
该接口仅有一个抽象方法mapRow,实现将Get/Scan操作所获取数据结果集中的指定位置结果行映射成实体类。
public interface RowMapper {
/*** @param result Single row result of Get or Scan query* @param rowNum Row number* @return Entity Class (Java Bean)*/
T mapRow(Result result, int rowNum) throws Exception;
}
诸如笔者项目实践如此写法,可以将RowMapper视为查询结果过滤函数,因为RowMapper是一个匿名内部类,在JDK1.8+可以修改成Lambda表达式。
@Override
public Optional findOneRowOneColumn(final String rowKey, final String qualifier) {
try {
return hbaseTemplate.get(_table, rowKey, _columnFamily, (result, rowNum) -> {
final Cell columnLatestCell = result.getColumnLatestCell(
Bytes.toBytes(_columnFamily), Bytes.toBytes(qualifier));
final ColumnTimestampPair columnTimestampPair = this.buildColumnTimestampPair(columnLatestCell);
columnTimestampPair.setQualifier(qualifier);
return Optional.of(columnTimestampPair);
});
} catch (final Exception e) {
logger.error("[查询数据] 查询单列数据失败, e = {}", e);
return Optional.empty();
}
}原生API使用Result类定义单行结果,内部细节如下:
public class Result implements CellScannable, CellScanner {
// 结果行中单元格数组 private Cell[] cells;
// 标识查询是否仅是校验单元格存在性 private Boolean exists;
// 标识是否是陈旧值,默认不是 private boolean stale = false;
// 标识是否包含行完整的单元格,默认包含完整单元格 private boolean partial = false;
// 缓存,表明未使用Java序列化 private transient byte [] row = null;
private transient NavigableMap>> familyMap = null;
// 本地缓冲区 private static ThreadLocal localBuffer = new ThreadLocal();
// 缓冲区未利用长度,默认为128B private static final int PAD_WIDTH = 128;
// 只读的空结果行 public static final Result EMPTY_RESULT = new Result(true);
// 初始化的CellScanner索引值,默认-1 private final static int INITIAL_CELLSCANNER_INDEX = -1;
...
}
Result类层次如下图2所示,图2,Result类结构层级单元格是否可扫描接口,实现该接口并调用单元格扫描器方法后可获取单元格扫描器。
public interface CellScannable {
/*** 获取单元格扫描器*/
CellScanner cellScanner();
}单元格扫描器接口,定义获取扫描器当前指向单元格以及判断后续单元格是否存在两个方法。
public interface CellScanner {
/*** 扫描器指向的当前单元格(一般在advance方法结果为true时使用)*/
Cell current();
/*** 提前扫描器1个单元格, 判断指向当前单元格后续1格是否存在元素*/
boolean advance() throws IOException;
}
7、HbaseTemplate,Spring对HBase访问支持核心API
直接支持增删改查简单操作,间接支持批量插入、指定时间戳插入等功能。
u淫ic class HbaseTempla
te extends HbaseAccessor implements HbaseOperations {
// 是否自动提交
private boolean autoFlush = true;
public HbaseTemplate() {
}
/** 基于配置初始化HbaseTemplate */
public HbaseTemplate(Configuration configuration) {
setConfiguration(configuration);
afterPropertiesSet();
}
/**
* 一般用于批量操作,简单操作已经完备
*/
@Override
public T execute(String tableName, TableCallback action) {
Assert.notNull(action, "Callback object must not be null");
Assert.notNull(tableName, "No table specified");
HTableInterface table = getTable(tableName);
try {
boolean previousFlushSetting = applyFlushSetting(ta
ble);
T result = action.doInTable(table);
flushIfNecessary(table, previousFlushSetting);
return result;
} catch (Throwable th) {
if (th instanceof Error) {
throw ((Error) th);
}
if (th instanceof RuntimeException) {
throw ((RuntimeException) th);
}
throw convertHbaseAccessException((Exception) th);
} finally {
releaseTable(tableName, table);
}
}
private HTableInterface getTable(String tableName) {
return HbaseUtils.getHTable(tableName, getConfiguration(), getCharset(), getTableFactory());
}
private void releaseTable(String tableName, HTableInterface table) {
HbaseUtils.releaseTable(tableName, table, getTableFactory());
}
@SuppressWarnings("deprecation")
private boolean applyFlushSetting(HTableInterface table) {
boolean autoFlush = table.isAutoFlush();
if (table instanceof HTable) {
((HTable) table).setAutoFlush(this.autoFlush);
}
return autoFlush;
}
@SuppressWarnings("deprecation")
private void restoreFlushSettings(HTableInterface table, boolean oldFlush) {
if (table instanceof HTable) {
if (table.isAutoFlush() != oldFlush) {
((HTable) table).setAutoFlush(oldFlush);
}
}
}
private void flushIfNecessary(HTableInterface table, boolean oldFlush) throws IOException {
// TODO: check whether we can consider or not a table scope
// 刷新提交
table.flushCommits();
// 重新存储刷新设置
restoreFlushSettings(table, oldFlush);
}
public DataAccessException convertHbaseAccessException(Exception ex) {
return HbaseUtils.convertHbaseException(ex);
}
@Override
public T find(String tableName, String family, final ResultsExtractor action) {
Scan scan = new Scan();
scan.addFamily(family.getBytes(getCharset()));
return find(tableName, scan, action);
}
@Override
public T find(String tableName, String family, String qualifier, final ResultsExtractor action) {
Scan scan = new Scan();
scan.addColumn(family.getBytes(getCharset()), qualifier.getBytes(getCharset()));
return find(tableName, scan, action);
}
@Override
public T find(String tableName, final Scan scan, final ResultsExtractor action) {
return execute(tableName, new TableCallback() {
@Override
public T doInTable(HTableInterface htable) throws Throwable {
ResultScanner scanner = htable.getScanner(scan);
try {
return action.extractData(scanner);
} finally {
scanner.close();
}
}
});
}
@Override
public List find(String tableName, String family, final RowMapper action) {
Scan scan = new Scan();
scan.addFamily(family.getBytes(getCharset()));
return find(tableName, scan, action);
}
@Override
public List find(String tableName, String family, String qualifier, final RowMapper action) {
Scan scan = new Scan();
scan.addColumn(family.getBytes(getCharset()), qualifier.getBytes(getCharset()));
return find(tableName, scan, action);
}
@Override
public List find(String tableName, final Scan scan, final RowMapper action) {
return find(tableName, scan, new RowMapperResultsExtractor(action));
}
@Override
public T get(String tableName, String rowName, final RowMapper mapper) {
return get(tableName, rowName, null, null, mapper);
}
@Override
public T get(String tableName, String rowName, String familyName, final RowMapper mapper) {
return get(tableName, rowName, familyName, null, mapper);
}
@Override
public T get(String tableName, final String rowName, final String familyName, final String qualifier, final RowMapper mapper) {
return execute(tableName, new TableCallback() {
@Override
public T doInTable(HTableInterface htable) throws Throwable {
Get get = new Get(rowName.getBytes(getCharset()));
if (familyName != null) {
byte[] family = familyName.getBytes(getCharset());
if (qualifier != null) {
get.addColumn(family, qualifier.getBytes(getCharset()));
} else {
get.addFamily(family);
}
}
Result result = htable.get(get);
return mapper.mapRow(result, 0);
}
});
}
@Override
public void put(String tableName, final String rowName, final String familyName, final String qualifier, final byte[] value) {
Assert.hasLength(rowName);
Assert.hasLength(familyName);
Assert.hasLength(qualifier);
Assert.notNull(value);
execute(tableName, new TableCallback() {
@Override
public Object doInTable(HTableInterface htable) throws Throwable {
Put put = new Put(rowName.getBytes(getCharset())).add(familyName.getBytes(getCharset()), qualifier.getBytes(getCharset()), value);
htable.put(put);
return null;
}
});
}
@Override
public void delete(String tableName, final String rowName, final String familyName) {
delete(tableName, rowName, familyName, null);
}
@Override
public void delete(String tableName, final String rowName, final String familyName, final String qualifier) {
Assert.hasLength(rowName);
Assert.hasLength(familyName);
execute(tableName, new TableCallback() {
@Override
public Object doInTable(HTableInterface htable) throws Throwable {
Delete delete = new Delete(rowName.getBytes(getCharset()));
byte[] family = familyName.getBytes(getCharset());
if (qualifier != null) {
delete.deleteColumn(family, qualifier.getBytes(getCharset()));
} else {
delete.deleteFamily(family);
}
htable.delete(delete);
return null;
}
});
}
/**
* 设置自动刷新
* @param autoFlush 是否自动刷新
*/
public void setAutoFlush(boolean autoFlush) {
this.autoFlush = autoFlush;
}
}
HbaseTemplate继承层次如图3所示,图3,HbaseTemplate继承层次
8、HbaseConfigurationFactoryBean类,配置FactoryBean
public class HbaseConfigurationFactoryBean
implements InitializingBean, DisposableBean, FactoryBean {
// 是否允许删除连接,默认true
private boolean deleteConnection = true;
// HBase配置
private Configuration configuration;
// Hadoop配置
private Configuration hadoopConfig;
private Properties properties;
// 选举法定人数(ZK集群地址)
private String quorum;
// 端口
private Integer port;
public void setDeleteConnection(boolean deleteConnection) {
this.deleteConnection = deleteConnection;
}
public void setStopProxy(boolean stopProxy) {
log.warn("Use of 'stopProxy' has been deprecated");
}
public void setConfiguration(Configuration configuration) {
this.hadoopConfig = configuration;
}
@SuppressWarnings("deprecation")
public void destroy() {
if (deleteConnection) {
// HBase连接管理器, 如果允许删除则有管理器删除
HConnectionManager.deleteConnection(getObject());
}
}
public void setProperties(Properties properties) {
this.properties = properties;
}
public void afterPropertiesSet() {
// 如果Hadoop配置存在,则使用Hadoop配置构建HBase
configuration = (hadoopConfig != null ?
HBaseConfiguration.create(hadoopConfig) : HBaseConfiguration.create());
// 动态添加配置属性
ConfigurationUtils.addProperties(configuration, properties);
// set host and port last to override any other properties
if (StringUtils.hasText(quorum)) {
configuration.set(HConstants.ZOOKEEPER_QUORUM, quorum.trim());
}
if (port != null) {
configuration.set(HConstants.ZOOKEEPER_CLIENT_PORT, port.toString());
}
}
public Configuration getObject() {
return configuration;
}
public Class extends Configuration> getObjectType() {
return (configuration != null ?
configuration.getClass() : Configuration.class);
}
public boolean isSingleton() {
return true;
}
public void setZkQuorum(String quorum) {
this.quorum = quorum;
}
public void setZkPort(Integer port) {
this.port = port;
}
}
9、其它相关类
以下三个相关类相较于上述核心类重要性相对要次一级,对核心流影响不大,因此不再详述。HbaseSynchronizationManager,HBase同步管理器,实现表绑定/解绑资源、获取资源、获取表名等信息;
HbaseUtils,HBase工具类,实现异常转换、释放连接、获取表信息、判定表是否绑定线程;
HbaseSystemException,HBase系统异常