1.背景
在client端访问HBase的时候,经常会遇到多次串型查询(即client端应用程序每次从HBase中查询的结果中的一些数据作为下一步查询条件),这种场景往往导致不必要的网络消耗。我们可以利用HBase提供的Coprocessor机制来减少这种不必要的网络开销,从而达到性能上的优化。
2.寻找HBase依赖的protobuf版本
在实际生产环境往往已经安装了某个版本的HBase,由于HBase的Client和Server之间采用protobuf协议,所以在实际动手之前我们先找一下和实际HBase版本想匹配的protobuf的版本号。由于本人用的是HBase的CDH版本,版本号为1.2.0-cdh5.14.2。所以我将HBase的hbase-client和hbase-server引入我的Intellij中,并打开依赖树找到依赖的protobuf版本号为2.5.0,如下图:
3.Mac安装protobuf
由于我平时用Mac来开发,所以我需要在Mac OS上安装Protobuf。
3.1.下载protobuf
https://github.com/protocolbuffers/protobuf/releases/tag/v2.5.0 下载Protobuf包。
3.2.解压、编译、安装protobuf
在Mac的终端中按照以下步骤进行安装:
unzip protobuf-2.5.0.zip
cd protobuf-2.5.0/
./configure
make
make check
make install
安装中我遇到以下错误:
configure: error: C compiler cannot create executables
原因是gcc没安装,解决了下,参照的是https://cn.aliyun.com/jiaocheng/383415.html
4.创建proto文件
GraphTravesalService.proto
option java_package = "com.xxx.xxx.graph.protocol";
option java_outer_classname = "GraphTravesalProtocol";
option java_generic_services = true;
option java_generate_equals_and_hash = true;
option optimize_for = SPEED;
message TraversalRequest {
required string vertex_id = 1;
required string travesal_step = 2;
}
message TraversalResponse {
required string vertex_ids = 1;
}
service TraversalService {
rpc getAdjacentVertices (TraversalRequest)
returns (TraversalResponse);
}
5.建立project
pom.xml文件内容如下
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>xxx-graph-java</artifactId>
<groupId>com.xxx.xxx</groupId>
<version>1.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>graph-hbase-coprocessor</artifactId>
<repositories>
<repository>
<id>cloudera</id>
<url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
</repository>
</repositories>
<properties>
<hbase.version>1.2.0-cdh5.14.2</hbase.version>
<protobuf.version>2.5.0</protobuf.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-common</artifactId>
<version>${hbase.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>${hbase.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>${hbase.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
<version>${protobuf.version}</version>
<scope>provided</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
6.将proto文件编译为java文件
protoc --java_out=/Users/xxx/workspaces/xxx-graph/xxx-graph/xxx-graph-java/graph-hbase-coprocessor/src/main/java/ GraphTravesalService.proto
7.编写Coprocessor调用类GraphTravesal
package com.xxx.xxx.graph.coprocessor;
import com.xxx.xxx.graph.protocol.GraphTravesalProtocol;
import com.google.protobuf.RpcCallback;
import com.google.protobuf.RpcController;
import com.google.protobuf.Service;
import org.apache.hadoop.hbase.Coprocessor;
import org.apache.hadoop.hbase.CoprocessorEnvironment;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.coprocessor.CoprocessorService;
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
public class GraphTravesal extends GraphTravesalProtocol.TraversalService
implements Coprocessor, CoprocessorService {
private RegionCoprocessorEnvironment env;
private final static String EDGE_LABEL = "join";
private final static String DIRECTION_IN = "in";
@Override
public Service getService() {
return this;
}
@Override
public void start(CoprocessorEnvironment coprocessorEnvironment) throws IOException {
if (coprocessorEnvironment instanceof RegionCoprocessorEnvironment) {
this.env = (RegionCoprocessorEnvironment) coprocessorEnvironment;
} else {
throw new IOException("Must be loaded on a table region!");
}
}
@Override
public void stop(CoprocessorEnvironment coprocessorEnvironment) throws IOException {
}
@Override
public void getAdjacentVertices(RpcController controller, GraphTravesalProtocol.TraversalRequest request, RpcCallback<GraphTravesalProtocol.TraversalResponse> done) {
String vertexId = request.getVertexId();
if(vertexId == null || "".equals(vertexId)){
throw new NullPointerException("you need specify the vertexIds");
}
Result result = null;
try {
result = env.getRegion().get(new Get(vertexId.getBytes()));
} catch (IOException e) {
e.printStackTrace();
}
StringBuffer adjacentVertexIds = new StringBuffer();
Map famliyMap = result.getFamilyMap((EDGE_LABEL + "_" + DIRECTION_IN).getBytes());
Iterator iterator = famliyMap.keySet().iterator();
while (iterator.hasNext()){
byte[] edgeId = (byte[]) iterator.next();
adjacentVertexIds.append(new String(edgeId));
adjacentVertexIds.append(";");
}
GraphTravesalProtocol.TraversalResponse response = GraphTravesalProtocol.TraversalResponse.newBuilder().setVertexIds(adjacentVertexIds.toString()).build();
done.run(response);
}
}
8.将工程打成jar包放入HBase region server lib下面
CDH部署的HBase lib路径如下
/opt/cloudera/parcels/CDH/lib/hbase/lib
9.在Cloudera Manager上配置comprocessor
10.在Cloudera Manager上重启HBase
11.编写GraphTravesalClient来测试调用我们些的corprocessor
package com.xxx.xxx.graph.client;
import com.xxx.xxx.graph.protocol.GraphTravesalProtocol;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.coprocessor.Batch;
import org.apache.hadoop.hbase.ipc.BlockingRpcCallback;
import org.apache.hadoop.hbase.ipc.ServerRpcController;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
public class GraphTravesalClient {
public static void main(String[] args) throws Throwable {
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "zk1:2181,zk2:2181,zk3:2181");
HTable table = new HTable(conf, "graph_table");
final GraphTravesalProtocol.TraversalRequest req = GraphTravesalProtocol.TraversalRequest.newBuilder().setVertexId("454_10001_33c0bba3150baf4e2314f5feed0f5132045d8e390155da61e40d4886bc343bb6_1").setTravesalStep("1").build();
Map<byte[], String> tmpRet = table.coprocessorService(GraphTravesalProtocol.TraversalService.class, null, null, new Batch.Call<GraphTravesalProtocol.TraversalService, String>() {
@Override
public String call(GraphTravesalProtocol.TraversalService rowCountService) throws IOException {
ServerRpcController controller = new ServerRpcController();
BlockingRpcCallback<GraphTravesalProtocol.TraversalResponse> rpc = new BlockingRpcCallback();
rowCountService.getAdjacentVertices(controller, req, rpc);
GraphTravesalProtocol.TraversalResponse resp = rpc.get();
return resp.getVertexIds();
}
});
Iterator iterator = tmpRet.keySet().iterator();
while (iterator.hasNext()){
byte[] key = (byte[]) iterator.next();
System.out.println(tmpRet.get(key));
}
}
}
12.HBase入测试数据如下
13.测试结果如下