hbase的CoprocessorProtocol及一个简单的通用扩展实现

最新推荐文章于 2022-09-26 22:20:05 发布

xiao_jun_0820

最新推荐文章于 2022-09-26 22:20:05 发布

阅读量2.6k

点赞数

分类专栏： hbase

hbase 专栏收录该内容

27 篇文章 0 订阅

订阅专栏

原文：http://zhang-xzhi-xjtu.iteye.com/blog/1926732

hbase中的CoprocessorProtocol机制.

CoprocessorProtocol的原理比较简单，近似于一个mapreduce框架。由client将scan分解为面向多个region的请求，并行发送请求到多个region，然后client做一个reduce的操作，得到最后的结果。

先看一个例子，使用hbase的AggregationClient可以做到简单的面向单个column的统计。

    Java代码   
    
  
 @Test  
 public void testAggregationClient() throws Throwable {  
   
     LongColumnInterpreter columnInterpreter = new LongColumnInterpreter();  
   
     AggregationClient aggregationClient = new AggregationClient(  
             CommonConfig.getConfiguration());  
     Scan scan = new Scan();  
   
     scan.addColumn(ColumnFamilyName, QName1);  
   
     Long max = aggregationClient.max(TableNameBytes, columnInterpreter,  
             scan);  
     Assert.assertTrue(max.longValue() == 100);  
   
     Long min = aggregationClient.min(TableNameBytes, columnInterpreter,  
             scan);  
     Assert.assertTrue(min.longValue() == 20);  
   
     Long sum = aggregationClient.sum(TableNameBytes, columnInterpreter,  
             scan);  
     Assert.assertTrue(sum.longValue() == 120);  
   
     Long count = aggregationClient.rowCount(TableNameBytes,  
             columnInterpreter, scan);  
     Assert.assertTrue(count.longValue() == 4);  
   
 }  

看下hbase的源码。AggregateImplementation

    Java代码   
    
  
 @Override  
   public <T, S> T getMax(ColumnInterpreter<T, S> ci, Scan scan)  
       throws IOException {  
     T temp;  
     T max = null;  
     InternalScanner scanner = ((RegionCoprocessorEnvironment) getEnvironment())  
         .getRegion().getScanner(scan);  
     List<KeyValue> results = new ArrayList<KeyValue>();  
     byte[] colFamily = scan.getFamilies()[0];  
     byte[] qualifier = scan.getFamilyMap().get(colFamily).pollFirst();  
     // qualifier can be null.  
     try {  
       boolean hasMoreRows = false;  
       do {  
         hasMoreRows = scanner.next(results);  
         for (KeyValue kv : results) {  
           temp = ci.getValue(colFamily, qualifier, kv);  
           max = (max == null || (temp != null && ci.compare(temp, max) > 0)) ? temp : max;  
         }  
         results.clear();  
       } while (hasMoreRows);  
     } finally {  
       scanner.close();  
     }  
     log.info("Maximum from this region is "  
         + ((RegionCoprocessorEnvironment) getEnvironment()).getRegion()  
             .getRegionNameAsString() + ": " + max);  
     return max;  
   }  

这里由于

    Java代码   
    
  
 byte[] colFamily = scan.getFamilies()[0];  
 byte[] qualifier = scan.getFamilyMap().get(colFamily).pollFirst();  

所以，hbase自带的Aggregate函数，只能面向单列进行统计。

当我们想对多列进行Aggregate，并同时进行countRow时，有以下选择。
1 scan出所有的row，程序自己进行Aggregate和count。
2 使用AggregationClient，调用多次，得到所有的结果。由于多次调用，有一致性问题。
3 自己扩展CoprocessorProtocol。

首先我们可以写一个protocol的通用框架。
定义protocol接口。

    Java代码   
    
 public interface MyCoprocessorProtocol extends CoprocessorProtocol {  
   
     public static final long VERSION = 1L;  
   
     public <T> T handle(RowHandler<T> rowHandler, Scan scan) throws IOException;  
 }

定义该protocol的实现。

    Java代码   
    
  
 public class MyEndpointImpl extends BaseEndpointCoprocessor implements  
         MyCoprocessorProtocol {  
   
     protected static Log log = LogFactory.getLog(MyEndpointImpl.class);  
   
     @Override  
     public ProtocolSignature getProtocolSignature(String protocol,  
             long version, int clientMethodsHashCode) throws IOException {  
         if (MyCoprocessorProtocol.class.getName().equals(protocol)) {  
             return new ProtocolSignature(MyCoprocessorProtocol.VERSION, null);  
         }  
         throw new IOException("Unknown protocol: " + protocol);  
     }  
   
     @Override  
     public <T> T handle(RowHandler<T> rowHandler, Scan scan) throws IOException {  
   
         InternalScanner scanner = ((RegionCoprocessorEnvironment) getEnvironment())  
                 .getRegion().getScanner(scan);  
         List<KeyValue> results = new ArrayList<KeyValue>();  
         T t = rowHandler.getInitValue();  
         try {  
             boolean hasMoreRows = false;  
   
             do {  
                 hasMoreRows = scanner.next(results);  
                 log.debug("scanner result : " + results + " hasMoreRows = "  
                         + hasMoreRows);  
                 t = rowHandler.handle(results, t);  
   
                 results.clear();  
             } while (hasMoreRows);  
         } finally {  
             scanner.close();  
         }  
         return t;  
     }  
 }  

定义一个rowHandler。

    Java代码   
    
 public interface RowHandler<T> extends Writable {  
   
     public T getInitValue();  
   
     public T handle(List<KeyValue> keyValues, T t);  
 }

定义一个reduce。

    Java代码   
    
 public interface MyReducer<T, R> {  
   
     public R getInitValue();  
   
     public R reduce(R r, T t);  
 }

定义一个client。

    Java代码   
    
  
 public class MyClient {  
   
     HTableInterface table;  
   
     public MyClient(HTableInterface table) {  
         this.table = table;  
     }  
   
     public <T, R> R call(final byte[] tableName,  
             final RowHandler<T> howHandler, final MyReducer<T, R> myReducer,  
             final Scan scan) throws Throwable {  
   
         class MyCallBack implements Batch.Callback<T> {  
             R r = myReducer.getInitValue();  
   
             R getResult() {  
                 return r;  
             }  
   
             @Override  
             public synchronized void update(byte[] region, byte[] row, T result) {  
                 r = myReducer.reduce(r, result);  
             }  
         }  
   
         MyCallBack myCallBack = new MyCallBack();  
   
         try {  
             table.coprocessorExec(MyCoprocessorProtocol.class,  
                     scan.getStartRow(), scan.getStopRow(),  
                     new Batch.Call<MyCoprocessorProtocol, T>() {  
                         @Override  
                         public T call(MyCoprocessorProtocol instance)  
                                 throws IOException {  
                             return instance.handle(howHandler, scan);  
                         }  
                     }, myCallBack);  
         } finally {  
             table.close();  
         }  
   
         return myCallBack.getResult();  
     }  
 }  

这样，我们就有了一个protocol的通用框架。
假设我们要一个count的功能。
则只需要实现对应的handler和reducer。

    Java代码   
    
  
 public class CountHandler implements RowHandler<Long> {  
   
     @Override  
     public void readFields(DataInput arg0) throws IOException {  
     }  
   
     @Override  
     public void write(DataOutput arg0) throws IOException {  
     }  
   
     @Override  
     public Long getInitValue() {  
         return 0L;  
     }  
   
     @Override  
     public Long handle(List<KeyValue> keyValues, Long t) {  
         if (!keyValues.isEmpty()) {  
             return t + 1;  
         } else {  
             return t;  
         }  
     }  
   
 }  
   
 public class CountReducer implements MyReducer<Long, Long> {  
   
     @Override  
     public Long getInitValue() {  
         return 0L;  
     }  
   
     @Override  
     public Long reduce(Long r, Long t) {  
         return r + t;  
     }  
 }  

假设我们要实现多个列的sum和全部结果的row，我们也只是通过添加hander,reducer和result来实现。

    Java代码   
    
  
 public class CountAndSumResult implements Writable {  
   
     private List<Long> resultList = new ArrayList<Long>();  
   
     private Long count = 0L;  
   
     public CountAndSumResult() {  
     }  
   
     public CountAndSumResult(int resultSize) {  
         for (int i = 0; i < resultSize; i++) {  
             resultList.add(0L);  
         }  
     }  
   
     public Long getCount() {  
         return count;  
     }  
   
     public void setCount(Long count) {  
         this.count = count;  
     }  
   
     public Long getSum(int i) {  
         return resultList.get(i);  
     }  
   
     public void setSum(int i, Long sum) {  
         resultList.set(i, sum);  
     }  
   
     public int getResultSize() {  
         return resultList.size();  
     }  
   
     @Override  
     public void write(DataOutput out) throws IOException {  
         out.writeLong(count);  
         out.writeInt(resultList.size());  
         for (Long v : resultList) {  
             out.writeLong(v);  
         }  
     }  
   
     @Override  
     public void readFields(DataInput in) throws IOException {  
         count = in.readLong();  
         int size = in.readInt();  
         for (int i = 0; i < size; i++) {  
             resultList.add(in.readLong());  
         }  
     }  
   
 }  
   
   
 public class CountAndSumHandler implements RowHandler<CountAndSumResult> {  
   
     private List<String> columns = new ArrayList<String>();  
   
     public CountAndSumHandler() {  
     }  
   
     public CountAndSumHandler(List<String> columns) {  
         super();  
         this.columns = columns;  
     }  
   
     @Override  
     public void write(DataOutput out) throws IOException {  
         out.writeInt(columns.size());  
         for (String s : columns) {  
             out.writeUTF(s);  
         }  
   
     }  
   
     @Override  
     public void readFields(DataInput in) throws IOException {  
         int size = in.readInt();  
         for (int i = 0; i < size; i++) {  
             columns.add(in.readUTF());  
         }  
     }  
   
     @Override  
     public CountAndSumResult handle(List<KeyValue> keyValues,  
             CountAndSumResult t) {  
   
         if (!keyValues.isEmpty()) {  
             t.setCount(t.getCount() + 1);  
         }  
   
         for (int i = 0; i < columns.size(); i++) {  
             String column = columns.get(i);  
             for (KeyValue kv : keyValues) {  
                 if (column.equals(Bytes.toString(kv.getQualifier()))) {  
                     byte[] value = kv.getValue();  
                     if (value == null || value.length == 0) {  
                     } else {  
                         Long tValue = Bytes.toLong(value);  
                         t.setSum(i, t.getSum(i) + tValue);  
                     }  
                     break;  
                 }  
             }  
         }  
   
         return t;  
     }  
   
     @Override  
     public CountAndSumResult getInitValue() {  
         return new CountAndSumResult(columns.size());  
     }  
   
 }  
   
   
 public class CountAndSumReducer implements  
         MyReducer<CountAndSumResult, CountAndSumResult> {  
   
     @Override  
     public CountAndSumResult getInitValue() {  
         return null;  
     }  
   
     @Override  
     public CountAndSumResult reduce(CountAndSumResult r, CountAndSumResult t) {  
         if (r == null) {  
             return t;  
         }  
         if (t == null) {  
             return r;  
         }  
         r.setCount(r.getCount() + t.getCount());  
   
         int size = r.getResultSize();  
         for (int i = 0; i < size; i++) {  
             r.setSum(i, r.getSum(i) + t.getSum(i));  
         }  
         return r;  
     }  
   
 }  

有了CoprocessorProtocol，可以扩展出来很多的功能，这个机制还是很强大的。

xiao_jun_0820

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
2
评论
hbase的CoprocessorProtocol及一个简单的通用扩展实现

原文：http://zhang-xzhi-xjtu.iteye.com/blog/1926732hbase中的CoprocessorProtocol机制. CoprocessorProtocol的原理比较简单，近似于一个mapreduce框架。由client将scan分解为面向多个region的请求，并行发送请求到多个region，然后client做一个reduce的操作，得到最后的结
复制链接

扫一扫