HBase -- 使用HBaseMiniCluster本地调试HBase

背景和介绍

  在测试HBase CDC时使用了HBase的Observer和Endpoint的协处理器,由于协处理器是运行在服务器端的,即regionserver上的代码,每次测试协处理器时需要打包到服务器,卸掉再重新关联协处理器,非常麻烦,且一旦出错也不可调试。
  为了方便开发人员,HBase以及其它Hadoop组件提供了Mini Cluster的运行方式:在一个JVM里模拟运行Hadoop集群,包括HDFS,Zookeeper,HBase 以及MapReduce;如果你需要运行一些简单的代码或测试案例,或者想在IDE中通过断点来调试,那么这时候用Mini Cluster就再合适不过了;HBase的单元测试中已经频繁使用这种方法;

使用

POM

<dependencies>
    <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-server</artifactId>
            <version>2.2.6</version>
            <scope>provided</scope>
        </dependency>
      <dependency>
         <groupId>org.apache.hbase</groupId>
         <artifactId>hbase-testing-util</artifactId>
         <version>2.2.6</version>
         <scope>provided</scope>
      </dependency>
       <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.13.1</version>
            <scope>test</scope>
        </dependency>
   </dependencies>

创建Endpoint

public class HbaseEndpoint extends BaseReplicationEndpoint {

    private static final Logger LOG = LoggerFactory.getLogger(HbaseEndpoint.class);

    private static final ToHRowJson TO_HROW_JSON = new ToHRowJson();

    @Override
    public UUID getPeerUUID() {
        return UUID.randomUUID();
    }

    @Override
    public boolean replicate(ReplicateContext context) {
        final List<WAL.Entry> entries = context.getEntries();

        final Map<String, List<WAL.Entry>> entriesByTable = entries.stream()
                .collect(groupingBy(entry -> entry.getKey().getTableName().getNameAsString()));

        // persist the data to kafka in parallel.
        entriesByTable.entrySet().stream().forEach(entry -> {
            final String tableName = entry.getKey();
            LOG.info("table: " + tableName);
            final List<WAL.Entry> tableEntries = entry.getValue();
            tableEntries.forEach(tblEntry -> {
                List<Cell> cells = tblEntry.getEdit().getCells();
                Map<String, List<Cell>> columnsByRow = cells.stream()
                        .collect(groupingBy(cell -> Bytes.toString(CellUtil.cloneRow(cell))));
                columnsByRow.entrySet().forEach(rowcols -> {
                    HRowJson rowJson = TO_HROW_JSON.apply(rowcols.getKey(), rowcols.getValue());
                    String jsonResult = JSONObject.toJSONString(rowJson);
                    LOG.info(jsonResult);
                    BaseProducer.produce(tableName, jsonResult);
                });
            });
        });
        return true;
    }

    @Override
    public void start() {
        LOG.info("Hbase replication to Kafka started at " + LocalDate.now());
        this.startAsync();
    }

    @Override
    public void stop() {
        LOG.info("Hbase replication to Kafka started at " + LocalDate.now());
        this.stopAsync();
    }

    @Override
    protected void doStart() {
        LOG.info("Hbase replication to Kafka doStarted at " + LocalDate.now());
        notifyStarted();
    }

    @Override
    protected void doStop() {
        LOG.info("Hbase replication to Kafka doStoped at " + LocalDate.now());
//        producer.close();
//        BaseProducer.close();
        notifyStopped();
    }
}

创建测试类,使用MiniCluster调试

public abstract class BaseTest {
    protected HBaseTestingUtility utility;
    protected int numRegionServers;

    @Before
    public void setUp() throws Exception {
      hbaseConf.setBoolean(HConstants.REPLICATION_BULKLOAD_ENABLE_KEY/*.REPLICATION_ENABLE_KEY*/, true);
        //System.setProperty("test.build.data.basedirectory", "E:/Download/hbase-test");
        //System.setProperty("hadoop.home.dir", "F:/Dev/Hadoop-2.8.5");
        utility = new HBaseTestingUtility(hbaseConf);
        utility.startMiniCluster();
        numRegionServers = utility.getHBaseCluster().getRegionServerThreads().size();
    }

    /**
     *
     * @param configuration
     * @param peerName
     * @param tableCFs
     * @throws ReplicationException
     * @throws IOException
     */
    protected void addPeer(final Configuration configuration, String peerName, Map<TableName, List<String>> tableCFs)
            throws ReplicationException, IOException {
        Connection connection = ConnectionFactory.createConnection(configuration);
        Admin admin = connection.getAdmin();
        ReplicationPeerConfig peerConfig = ReplicationPeerConfig.newBuilder().setClusterKey(ZKConfig.getZooKeeperClusterKey(configuration))
                .setReplicationEndpointImpl(HbaseEndpoint.class.getName()).build();
        admin.addReplicationPeer(peerName, peerConfig);
    }

    @After
    public void tearDown() throws Exception {
        if (utility != null) {
            utility.shutdownMiniCluster();
        }
    }
}
public class TestKafkaReplication extends BaseTest {

    public static final Logger LOG = LoggerFactory.getLogger(TestKafkaReplication.class);

    private static final String PEER_NAME = "hbase.cdc.kafka";
		protected final TableName TABLE_NAME = TableName.valueOf("testings");
		protected final String ROWKEY = "rk-%s";
		protected final String COLUMN_FAMILY = "d";
		protected final String QUALIFIER = "q";
		protected final String VALUE = "v";

		@Test
    public void testCustomReplicationEndpoint() throws Exception {
				try {
	          Map<TableName, List<String>> tableCfs = new HashMap<>();
	          List<String> cfs = new ArrayList<>();
	          cfs.add(COLUMN_FAMILY);
	          tableCfs.put(TABLE_NAME, cfs);

	          createTestTable();
	          addPeer(utility.getConfiguration(), PEER_NAME, tableCfs);
			  int numberOfRecords = 10;
	          addData(numberOfRecords);
        } finally {
	          removePeer();
        }
	  }

    /**
     * Create the hbase table with a scope set to Global
     * @throws IOException
     */
    private void createTestTable() throws IOException {
        try(HBaseAdmin hBaseAdmin = utility.getHBaseAdmin()) {
            final HTableDescriptor hTableDescriptor = new HTableDescriptor(TABLE_NAME);
            final HColumnDescriptor hColumnDescriptor = new HColumnDescriptor(COLUMN_FAMILY);
            hColumnDescriptor.setScope(HConstants.REPLICATION_SCOPE_GLOBAL);
            hTableDescriptor.addFamily(hColumnDescriptor);
            hBaseAdmin.createTable(hTableDescriptor);
        }
        utility.waitUntilAllRegionsAssigned(TABLE_NAME);
    }

    /**
     * Adds data to the previously created HBase table
     * @throws IOException
     */
    private void addData(int numberOfRecords) throws IOException {
        try(Table hTable = ConnectionFactory.createConnection(utility.getConfiguration()).getTable(TABLE_NAME)) {
            for(int i = 0; i < numberOfRecords; i++) {
	              Put put = new Put(toBytes(String.format(ROWKEY, i)));
	              put.addColumn(toBytes(COLUMN_FAMILY), toBytes(QUALIFIER), toBytes(VALUE));
	              hTable.put(put);
            }
        }
    }

    /**
     * Removes the peer
     * @throws IOException
     * @throws ReplicationException
     */
    private void removePeer() throws IOException, ReplicationException {
        try(ReplicationAdmin replicationAdmin = new ReplicationAdmin(utility.getConfiguration())) {
            replicationAdmin.removePeer(PEER_NAME);
        }
    }
}

问题

  如果你直接使用上面的代码或者从别的地方copy过来,在windows上运行,大概率是会报这些错误的:

  1. All datanodes are bad. Aborting…
java.io.IOException: All datanodes [DatanodeInfoWithStorage[127.0.0.1:2785,DS-384e5701-c6b0-453f-a79d-a14c96a12397,DISK]] are bad. Aborting...

	at org.apache.hadoop.hdfs.DataStreamer.handleBadDatanode(DataStreamer.java:1530)
	at org.apache.hadoop.hdfs.DataStreamer.setupPipelineForAppendOrRecovery(DataStreamer.java:1465)
	at org.apache.hadoop.hdfs.DataStreamer.processDatanodeError(DataStreamer.java:1237)
	at org.apache.hadoop.hdfs.DataStreamer.run(DataStreamer.java:657)
  1. EOFException: Unexpected EOF while trying to read response from server
java.io.EOFException: Unexpected EOF while trying to read response from server
	at org.apache.hadoop.hdfs.protocolPB.PBHelperClient.vintPrefixed(PBHelperClient.java:402)
	at org.apache.hadoop.hdfs.protocol.datatransfer.PipelineAck.readFields(PipelineAck.java:213)
	at org.apache.hadoop.hdfs.DataStreamer$ResponseProcessor.run(DataStreamer.java:1073)
  1. 系统找不到指定的路径。
Caused by: 3: 系统找不到指定的路径。

	at org.apache.hadoop.io.nativeio.NativeIO.renameTo0(Native Method)
	at org.apache.hadoop.io.nativeio.NativeIO.renameTo(NativeIO.java:877)
	at org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl.moveBlockFiles(FsDatasetImpl.java:890)
	... 7 more
  1. Failed to move meta file for ReplicaBeingWritten
IOException in BlockReceiver.run(): 
java.io.IOException: Failed to move meta file for ReplicaBeingWritten, blk_1073741828_1004, RBW
  getNumBytes()     = 7
  getBytesOnDisk()  = 7
  getVisibleLength()= 7
  getVolume()       = F:\Cache\IDEA202001\WorkSpace\Wisers\SparkPro\test-hbase2-cdc\target\test-data\f54971a6-6884-3ae6-8bb0-03479e546d76\cluster_510be0ed-46bb-cf4d-096a-78f10d101f39\dfs\data\data2\current
  getBlockFile()    = F:\Cache\IDEA202001\WorkSpace\Wisers\SparkPro\test-hbase2-cdc\target\test-data\f54971a6-6884-3ae6-8bb0-03479e546d76\cluster_510be0ed-46bb-cf4d-096a-78f10d101f39\dfs\data\data2\current\BP-1066010076-127.0.0.1-1613700403468\current\rbw\blk_1073741828
  bytesAcked=7
  bytesOnDisk=7 from F:\Cache\IDEA202001\WorkSpace\Wisers\SparkPro\test-hbase2-cdc\target\test-data\f54971a6-6884-3ae6-8bb0-03479e546d76\cluster_510be0ed-46bb-cf4d-096a-78f10d101f39\dfs\data\data2\current\BP-1066010076-127.0.0.1-1613700403468\current\rbw\blk_1073741828_1004.meta to F:\Cache\IDEA202001\WorkSpace\Wisers\SparkPro\test-hbase2-cdc\target\test-data\f54971a6-6884-3ae6-8bb0-03479e546d76\cluster_510be0ed-46bb-cf4d-096a-78f10d101f39\dfs\data\data2\current\BP-1066010076-127.0.0.1-1613700403468\current\finalized\subdir0\subdir0\blk_1073741828_1004.meta
	at org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl.moveBlockFiles(FsDatasetImpl.java:892)
	at org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.BlockPoolSlice.addBlock(BlockPoolSlice.java:315)
	at org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsVolumeImpl.addFinalizedBlock(FsVolumeImpl.java:879)
	at org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl.finalizeReplica(FsDatasetImpl.java:1786)
	at org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl.finalizeBlock(FsDatasetImpl.java:1752)
	at org.apache.hadoop.hdfs.server.datanode.BlockReceiver$PacketResponder.finalizeBlock(BlockReceiver.java:1441)
	at org.apache.hadoop.hdfs.server.datanode.BlockReceiver$PacketResponder.run(BlockReceiver.java:1398)
	at java.lang.Thread.run(Thread.java:748)

解决

  参考:Running Hbase Testing Utility On Windows

  由于我这里已经有了本地hadoop winutils工具以及环境变量,所以我这里直接走了该文章第4步,即在setup方法增加两个本地路径的配置即可。所以只需要将上面代码中setup中的注释去掉即可。

System.setProperty("test.build.data.basedirectory", "E:/Download/hbase-test");
System.setProperty("hadoop.home.dir", "F:/Dev/Hadoop-2.8.5");
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值