HBase Version: hbase-0.90.3-cdh3u1
org.apache.hadoop.hbase.client.HTablePool
用起来不是很方便. 所以重写了一些HTablePool, 对自己业务逻辑这块比较相关. 欢迎讨论.
主要是对源代码下面4点进行改进和设置:
1. 为不同的table建立的poolSize不一样, 目前HTablePool为所有的table建立的maxSize一致.
private final int maxSize;
2. 从HTablePool中getTable是只初始化了一个HTable, 而且在这个时候才初始化HTable的Queen.
这个点不是很好, 也是我想修改源码的出发点.
/**
* Get a reference to the specified table from the pool.<p>
*
* Create a new one if one is not available.
* @param tableName table name
* @return a reference to the specified table
* @throws RuntimeException if there is a problem instantiating the HTable
*/
public HTableInterface getTable(String tableName) {
LinkedList<HTableInterface> queue = tables.get(tableName);
if(queue == null) {
queue = new LinkedList<HTableInterface>();
tables.putIfAbsent(tableName, queue);
return createHTable(tableName);
}
HTableInterface table;
synchronized(queue) {
table = queue.poll();
}
if(table == null) {
return createHTable(tableName);
}
return table;
}
3. 应该有一个createTablePool的方法, 便于用户自己创建HTablePool.此方法可以与closeTablePool相互呼应.创建后面再关闭.
public void closeTablePool(final String tableName) {
Queue<HTableInterface> queue = tables.get(tableName);
synchronized (queue) {
HTableInterface table = queue.poll();
while (table != null) {
this.tableFactory.releaseHTableInterface(table);
table = queue.poll();
}
}
HConnectionManager.deleteConnection(this.config, true);
}
4. 由于HTable的Put可以使用优化, 让多个Put一起提交flushCommits(). 循环pool的Htable,调用flushCommits().
贴上修改后的源码:
package org.apache.hadoop.hbase.client;
import java.util.LinkedList;
import java.util.Queue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.util.Bytes;
/**
* A rewrite pool of HTable instances.<p>
*
* Each HTablePool acts as a pool for all tables. To use, instantiate an
* HTablePool and use {@link #getTable(String)} to get an HTable from the pool.
* Once you are done with it, return it to the pool with {@link #putTable(HTableInterface)}.
*
* <p>A pool can be created with a <i>maxSize</i> which defines the most HTable
* references that will ever be retained for each table. Otherwise the default
* is {@link Integer#MAX_VALUE}.
*
* <p>Pool will manage its own cluster to the cluster. See {@link HConnectionManager}.
*
* @author greatwqs
* @update 2012-08-25
*/
public class MyHTablePool {
public final static int DEFAULT_POOL_SIZE = 4;
/**
* ConcurrentMap<String, LinkedList<HTableInterface>>
* String tableName
* LinkedList<HTableInterface> the HTable pool contains HTableInterface
* LinkedList you can create HTable pool different size you want.
*/
private final ConcurrentMap<String, LinkedList<HTableInterface>> tables
= new ConcurrentHashMap<String, LinkedList<HTableInterface>>();
/***
* Configuration for hbase-site.xml
*/
private final Configuration config;
/**
* HTableInterfaceFactory that createHTableInterface and releaseHTableInterface
*/
private final HTableInterfaceFactory tableFactory;
/**
* Default Constructor.
*/
public MyHTablePool() {
this(HBaseConfiguration.create());
}
/**
* Constructor to set maximum versions and use the specified configuration.
* @param config configuration
*/
public MyHTablePool(final Configuration config) {
this(config, null);
}
public MyHTablePool(final Configuration config, final HTableInterfaceFactory tableFactory) {
// Make a new configuration instance so I can safely cleanup when
// done with the pool.
this.config = config == null ? new Configuration() : new Configuration(
config);
this.tableFactory = tableFactory == null ? new HTableFactory()
: tableFactory;
}
/**
* Create all the HTable instances , belonging to the given table.
* <p>
* Note: this is a 'create' of the given table pool.
* @param tableName
* @param maxSize
* @param isAutoFlush
*/
public void createHTablePool(final String tableName, final int maxSize, boolean isAutoFlush) {
LinkedList<HTableInterface> queue = tables.get(tableName);
if (queue == null) {
queue = new LinkedList<HTableInterface>();
tables.putIfAbsent(tableName, queue);
}
synchronized (queue) {
int addHTableSize = maxSize - queue.size();
if(addHTableSize <= 0){
return;
}
for(int i=0; i<addHTableSize; i++){
HTable table = (HTable)createHTable(tableName);
if(table != null){
table.setAutoFlush(isAutoFlush);
queue.add(table);
}
}
}
}
/**
* Create all the HTable instances , belonging to the given tables.
* <p>
* Note: this is a 'create' of the given table pool.
* @param tableNameArray
* @param maxSize
* @param isAutoFlush default false
* usage example:
* false: when {@link Put} use. use buffere put. call flushCommits after a time.
* you can design a thread(such as 3MS run a time)to loop all pool table, and call flushCommits.
* the performance well.
* true: when {@link Scan} and {@link Delete} use.
*/
public void createHTablePool(final String[] tableNameArray, final int maxSize, boolean isAutoFlush) {
for(String tableName : tableNameArray){
createHTablePool(tableName,maxSize,isAutoFlush);
}
}
/**
* Create all the HTable instances , belonging to the given tables.
* <p>
* Note: this is a 'create' of the given table pool.
* @param tableName
* @param maxSize
*/
public void createHTablePool(final String[] tableNameArray, final int maxSize) {
createHTablePool(tableNameArray,maxSize,false);
}
/**
* Get a reference to the specified table from the pool.<p>
*
* @param tableName table name
* @return a reference to the specified table
* @throws RuntimeException if there is a problem instantiating the HTable
*/
public HTableInterface getHTable(String tableName) {
LinkedList<HTableInterface> queue = tables.get(tableName);
if (queue == null) {
throw new RuntimeException("There is no pool for the HTable");
}
HTableInterface table;
synchronized (queue) {
table = queue.poll();
}
return table;
}
/**
* Get a reference to the specified table from the pool.<p>
*
* Create a new one if one is not available.
* @param tableName table name
* @return a reference to the specified table
* @throws RuntimeException if there is a problem instantiating the HTable
*/
public HTableInterface getHTable(byte[] tableName) {
return getHTable(Bytes.toString(tableName));
}
/**
* Puts the specified HTable back into the pool.
* <p>
*
* If the HTable not belong to HTablePool before, do not use this method.
*
* @param table table
*/
public void putHTableBack(HTableInterface table) {
LinkedList<HTableInterface> queue = tables.get(Bytes.toString(table.getTableName()));
synchronized (queue) {
queue.add(table);
}
}
protected HTableInterface createHTable(String tableName) {
return this.tableFactory.createHTableInterface(config, Bytes
.toBytes(tableName));
}
/**
* Closes all the HTable instances , belonging to the given table, in the table pool.
* <p>
* Note: this is a 'shutdown' of the given table pool and different from
* {@link #putTable(HTableInterface)}, that is used to return the table
* instance to the pool for future re-use.
*
* @param tableName
*/
public void closeHTablePool(final String tableName) {
Queue<HTableInterface> queue = tables.get(tableName);
synchronized (queue) {
HTableInterface table = queue.poll();
while (table != null) {
this.tableFactory.releaseHTableInterface(table);
table = queue.poll();
}
}
HConnectionManager.deleteConnection(this.config, true);
}
/**
* See {@link #closeTablePool(String)}.
*
* @param tableName
*/
public void closeHTablePool(final byte[] tableName) {
closeHTablePool(Bytes.toString(tableName));
}
/**
* See {@link #closeTablePool(String)}.
*
* @param tableName
*/
public void closeHTablePool() {
for(String tabName:tables.keySet()){
closeHTablePool(tabName);
}
}
/**
* getCurrentPoolSize
* @param tableName
* @return
*/
public int getCurrentPoolSize(String tableName) {
Queue<HTableInterface> queue = tables.get(tableName);
synchronized (queue) {
return queue.size();
}
}
}
org.apache.hadoop.hbase.client.MyHTablePoolTest 测试实例
package org.apache.hadoop.hbase.client;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.KeyValue;
/**
* @author greatwqs
* @update 2012-08-25
*/
public class MyHTablePoolTest {
/**
* test method
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception{
// 1. my config file
String configFile = "conf/hbase-site.xml";
Configuration config = new Configuration();
config.addResource(new Path(configFile));
// 2. init HTablePool
MyHTablePool myPool = new MyHTablePool(config);
// 3. create HTablePool for a table
myPool.createHTablePool("DCP_DataCenter_Base", MyHTablePool.DEFAULT_POOL_SIZE, false);
// 4. get already exist HTable from HTablePool
HTable table = (HTable) myPool.getHTable("DCP_DataCenter_Base");
if(table != null){
System.out.println("get HTable from HTablePool Success!");
}
// 5. get all data from HTable, and print to console.
Scan scan = new Scan();
ResultScanner rs = table.getScanner(scan);
try {
for (Result result : rs) {
KeyValue[] kv = result.raw();
byte[] key = kv[0].getRow();
System.out.println("RowKey: " + new String(key));
for (int i = 0; i < kv.length; i++) {
System.out.println("ColumnFamily: " + new String(kv[i].getFamily()));
System.out.println("Qualifier: "+ new String(kv[i].getQualifier()));
System.out.println("Timestamp: "+ String.valueOf(kv[i].getTimestamp()));
System.out.println("Value: " + new String(kv[i].getValue()));
}
System.out.println();
}
} catch (Exception e) {
e.printStackTrace();
} finally {
rs.close();
}
// 6. after use HTable end, then put the HTable back to HTablePool.
myPool.putHTableBack(table);
// 7. close HTablePool
myPool.closeHTablePool();
}
}