HBase 2.0版本协处理器 Endpoint使用

最新推荐文章于 2023-07-24 19:55:24 发布

TMH_ITBOY

最新推荐文章于 2023-07-24 19:55:24 发布

阅读量1.6k

点赞数 2

分类专栏： hbase 文章标签： HBase2.0协处理器 HBase Endpoint HBase 的 Count

本文链接：https://blog.csdn.net/LLJJYY001/article/details/90183679

版权

hbase 专栏收录该内容

8 篇文章 0 订阅

订阅专栏

在 HBase2.0之前使用协处理器 Endpoint 时,使用的方式是实现CoprocessorService,Coprocessor这两个接口,但是在2.0版本中,这么做在 hbase regionServer的日志中显示 Endpoint 加载成功了,但是在 hbase 的 regionServer UI 界面,并没有显示已经加载的Endpoint协处理器,也没有报错日志,很头疼.同时CoprocessorService显示是过期的,但是进源码只看到说将会在3.0移除,也没有说明替代方案,甚至官网 Endpoint 教程中使用的也是实现CoprocessorService,Coprocessor这两个接口.

下面以实现统计表中rowkey 的个数(统计行)为列:

老式写法:

proto 脚本:

option java_package = "com.ljy.coprocessor";
option java_outer_classname = "CountRows";
option java_generic_services = true;
option java_generate_equals_and_hash = true;
option optimize_for = SPEED;

message CountRequest {
}

message CountResponse {
    required int64 count = 1 [default=0];
}

service CountRowService {
    rpc getCountRows(CountRequest)
        returns (CountResponse);
}

Endpoint 代码:

package com.ljy.coprocessor;

import com.google.protobuf.RpcCallback;
import com.google.protobuf.RpcController;
import com.google.protobuf.Service;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.Coprocessor;
import org.apache.hadoop.hbase.CoprocessorEnvironment;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.coprocessor.CoprocessorService;
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
import org.apache.hadoop.hbase.regionserver.RegionScanner;
import org.apache.hadoop.hbase.shaded.protobuf.ResponseConverter;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class CountRowsSumEndpoint extends CountRows.CountRowService implements CoprocessorService, Coprocessor {

    private static Log LOG = LogFactory.getLog(CountRowsSumEndpoint.class);
    private RegionCoprocessorEnvironment rce = null;

    @Override
    public void stop(CoprocessorEnvironment env) throws IOException {
        LOG.info("=================CountRowsSumEndpoint#stop  be called");
    }

    @Override
    public void start(CoprocessorEnvironment env) throws IOException {
        rce = (RegionCoprocessorEnvironment) env;
        LOG.info("=================CountRowsSumEndpoint#start  be called");
    }

    @Override
    public void getCountRows(RpcController controller, CountRows.CountRequest request, RpcCallback<CountRows.CountResponse> done) {
        CountRows.CountResponse response = null;
        try {
            long count = getCount();
            response = CountRows.CountResponse.newBuilder().setCount(count).build();

        } catch (IOException e) {
            ResponseConverter.setControllerException(controller, e);
        }
        done.run(response);
    }

    private long getCount() throws IOException {
        if (rce == null) {
            LOG.error("===========rce is null");
            return 0;
        }
        long count = 0;
        byte[] currentRow = null;
        Scan scan = new Scan();
        try (final RegionScanner scanner = rce.getRegion().getScanner(scan)) {
            List<Cell> cells = new ArrayList<>();
            boolean hasMore;
            do {
                hasMore = scanner.nextRaw(cells);
                for (Cell cell : cells) {
                    if (currentRow == null || !CellUtil.matchingRow(cell, currentRow)) {
                        currentRow = CellUtil.cloneRow(cell);
                        count++;
                        break;
                    }
                }
                cells.clear();
            } while (hasMore);
        }
        return count;
    }

    @Override
    public Service getService() {
        return this;
    }
}

客户端测试:

package com.ljy.coprocessor;

import org.apache.hadoop.hbase.client.coprocessor.Batch;
import org.apache.hadoop.hbase.ipc.CoprocessorRpcUtils;

import java.io.IOException;

public class RowCountCallable implements Batch.Call<CountRows.CountRowService, Long> {

    private CountRows.CountRequest request;

    public RowCountCallable(CountRows.CountRequest request) {
        this.request = request;
    }

    @Override
    public Long call(CountRows.CountRowService instance) throws IOException {
        CoprocessorRpcUtils.BlockingRpcCallback<CountRows.CountResponse> rpcCallback =
                new CoprocessorRpcUtils.BlockingRpcCallback<>();
        instance.getCountRows(null, request, rpcCallback);
        final CountRows.CountResponse response = rpcCallback.get();
        return response.hasCount() ? response.getCount() : 0;
    }
}

 @Test
    public void testRowCountEndpoint() {
        try (final Table table = HBaseUtils.getTable("testDepFilter")) {
            CountRows.CountRequest request = CountRows.CountRequest.getDefaultInstance();
            final Map<byte[], Long> longMap = table.coprocessorService(CountRows.CountRowService.class, null, null, new RowCountCallable(request));
            long totalRows = 0;
            final Set<Map.Entry<byte[], Long>> entries = longMap.entrySet();
            for (Map.Entry<byte[], Long> entry : entries) {
                totalRows += entry.getValue();
                System.out.println("Region:" + Bytes.toString(entry.getKey()) + "包含" + entry.getValue() + "记录");
            }
            System.out.println("总记录数:" + totalRows);
        } catch (Throwable e) {
            e.printStackTrace();
        }
    }


    @After
    public void close() {
        HBaseUtils.closePool();
    }

package com.ljy.util;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Table;

import java.io.Closeable;
import java.io.IOException;
import java.util.Objects;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

public class HBaseUtils {

    /**
     * 创建表连接池
     * 创建一张表的实例是个开销很大的操作
     * 需要占用一些网络资源,与直接创建一些表句柄相比,使用连接池更好
     * 每次都从池子里获取
     * 关闭的时候将实例放入池子
     */
    private static ExecutorService pool = Executors.newFixedThreadPool(10);

    private static Connection conn;

    /**
     * 创建表连接
     */
    static {
        try {
            conn = ConnectionFactory.createConnection(getConf(), pool);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    private static Configuration getConf() {
        Configuration conf = HBaseConfiguration.create();
        conf.addResource(Objects.requireNonNull(ClassLoader.getSystemResourceAsStream("core-site.xml")));
        conf.addResource(Objects.requireNonNull(ClassLoader.getSystemResourceAsStream("hbase-site.xml")));
        return conf;
    }

    public static Connection getConnecttion() {
        return conn;
    }


    /**
     * 获取表实例
     *
     * @param tableName
     * @return
     * @throws IOException
     */
    public static Table getTable(String tableName) throws IOException {
        return conn.getTable(TableName.valueOf(tableName));
    }


    public static Admin getAdmin() throws IOException {
        return conn.getAdmin();
    }


    public static void close(Closeable... closes) {
        for (Closeable closeable : closes) {
            if (closeable != null) {
                try {
                    closeable.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    }

    public static void closePool() {
        if (conn != null) {
            close(conn);
        }

        if (pool != null) {
            pool.shutdown();
            pool = null;
        }
    }
}

以上代码使用静态加载或者是动态加载,在 RegionServer 的日志中都显示被 loaded,但是测试的时候因为不走 start 方法,所以没法拿到RegionCoprocessorEnvironment,故在getCountRows方法中获取region 时报空指针异常.

新版本 Endpoint 代码:

新版版本中采用实现RegionCoprocessor就可以实现 Endpoint 类型的协处理器了.

package com.ljy.coprocessor;

import com.google.protobuf.RpcCallback;
import com.google.protobuf.RpcController;
import com.google.protobuf.Service;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.CoprocessorEnvironment;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessor;
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
import org.apache.hadoop.hbase.regionserver.RegionScanner;
import org.apache.hadoop.hbase.shaded.protobuf.ResponseConverter;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

public class CountRowsSumEndpoint extends CountRows.CountRowService implements RegionCoprocessor {

    private static Log LOG = LogFactory.getLog(CountRowsSumEndpoint.class);
    private RegionCoprocessorEnvironment rce = null;

    @Override
    public void stop(CoprocessorEnvironment env) throws IOException {
        LOG.info("=================CountRowsSumEndpoint#stop  be called");
    }

    @Override
    public void start(CoprocessorEnvironment env) throws IOException {
        rce = (RegionCoprocessorEnvironment) env;
        LOG.info("=================CountRowsSumEndpoint#start  be called");
    }

    @Override
    public void getCountRows(RpcController controller, CountRows.CountRequest request, RpcCallback<CountRows.CountResponse> done) {
        CountRows.CountResponse response = null;
        try {
            long count = getCount();
            response = CountRows.CountResponse.newBuilder().setCount(count).build();

        } catch (IOException e) {
            ResponseConverter.setControllerException(controller, e);
        }
        done.run(response);
    }

    private long getCount() throws IOException {
        if (rce == null) {
            LOG.error("===========rce is null");
            return 0;
        }
        long count = 0;
        byte[] currentRow = null;
        Scan scan = new Scan();
        try (final RegionScanner scanner = rce.getRegion().getScanner(scan)) {
            List<Cell> cells = new ArrayList<>();
            boolean hasMore;
            do {
                hasMore = scanner.nextRaw(cells);
                for (Cell cell : cells) {
                    if (currentRow == null || !CellUtil.matchingRow(cell, currentRow)) {
                        currentRow = CellUtil.cloneRow(cell);
                        count++;
                        break;
                    }
                }
                cells.clear();
            } while (hasMore);
        }
        return count;
    }

    @Override
    public Iterable<Service> getServices() {
        return Collections.singleton(this);
    }

}

修改后不管是采用动态加载还是静态加载,都可以正常运行了.

以上仅仅当笔记.后面有时间可以好好梳理一下.

感叹!!!

TMH_ITBOY

关注

2
点赞
踩
1

收藏

觉得还不错? 一键收藏
4
评论
HBase 2.0版本协处理器 Endpoint使用

在 HBase2.0之前使用协处理器 Endpoint 时,使用的方式是实现CoprocessorService,Coprocessor这两个接口,但是在2.0版本中,这么做在 hbase regionServer的日志中显示 Endpoint 加载成功了,但是在 hbase 的 regionServer UI 界面,并没有显示已经加载的Endpoint协处理器,也没有报错日志,很头疼.同时Cop...
复制链接

扫一扫