1、Hadoop RPC使用
在正式介绍Hadoop RPC基本框架之前,先介绍怎么样使用它。Hadoop RPC主要对外提供了两种接口。
public static VersionedProtocol getProxy/waitForProxy():构造一个客户端代理对象(该对象实现了某个协议),用于向服务器端发送RPC请求。
public static Server getServer():为某个协议(实际上是Java接口)实例构造一个服务器对象,用于处理客户端发送的请求。
首先定义RPC协议
package MyRPC;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.ipc.VersionedProtocol;
public interface MyRPCProtocol extends VersionedProtocol {
public Text test(Text t) ;
}
然后构造并启动RPC Server。直接使用静态方法getServer()构造一个RPC Server,并调用函数start()启动该Server;
package MyRPC;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.ipc.RPC;
import org.apache.hadoop.ipc.RPC.Server;
public class RPCServer implements MyRPCProtocol{
Server server = null ;
public RPCServer() throws Exception{
server = RPC.getServer(this, "localhost", 8888, new Configuration()) ;
server.start() ;
server.join() ;
}
@Override
public long getProtocolVersion(String arg0, long arg1) throws IOException {
return 1;
}
public static void main(String[] args) throws Exception{
new RPCServer() ;
}
@Override
public Text test(Text t) {
if(t.toString().equals("RPC")){
return new Text("1") ;
}
return new Text("0");
}
}
最后构造RPC Client,并发送RPC请求。使用静态方法waitForProxy()构造客户端代理对象,直接通过代理对象调用远程端的方法。
package MyRPC;
import java.net.InetSocketAddress;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.ipc.RPC;
public class RPCClient {
private MyRPCProtocol proto;
public RPCClient() throws Exception {
InetSocketAddress addr = new InetSocketAddress("localhost", 8888);
proto = (MyRPCProtocol) RPC.waitForProxy(MyRPCProtocol.class, 1, addr,
new Configuration());
}
public void call(String s){
System.out.println(proto.test(new Text(s)));
}
public static void main(String[] args) throws Exception{
RPCClient client = new RPCClient() ;
client.call("RPC") ;
}
}
2、Hadoop1.2.1源码分析
首先当客户端调用waitForProxy方法时
proto = (MyRPCProtocol) RPC.waitForProxy(MyRPCProtocol.class, 1, addr,
new Configuration());
实际上调用了org.apache.hadoop.ipc.RPC中的waitForProxy方法。。。
/**
* Get a proxy connection to a remote server
* @param protocol protocol class
* @param clientVersion client version
* @param addr remote address
* @param conf configuration to use
* @param connTimeout time in milliseconds before giving up
* @return the proxy
* @throws IOException if the far end through a RemoteException
*/
static VersionedProtocol waitForProxy(
Class<? extends VersionedProtocol> protocol,
long clientVersion,
InetSocketAddress addr,
Configuration conf,
int rpcTimeout,
long connTimeout)
throws IOException {
long startTime = System.currentTimeMillis();
IOException ioe;
while (true) {
try {
return getProxy(protocol, clientVersion, addr, conf, rpcTimeout);
} catch(ConnectException se) { // namenode has not been started
LOG.info("Server at " + addr + " not available yet, Zzzzz...");
ioe = se;
} catch(SocketTimeoutException te) { // namenode is busy
LOG.info("Problem connecting to server: " + addr);
ioe = te;
}
// check if timed out
if (System.currentTimeMillis()-connTimeout >= startTime) {
throw ioe;
}
// wait for retry
try {
Thread.sleep(1000);
} catch (InterruptedException ie) {
// IGNORE
}
}
调用了org.apache.hadoop.ipc.RPC中的getProxy()方法。。。
/** Construct a client-side proxy object that implements the named protocol,
* talking to a server at the named address. */
public static VersionedProtocol getProxy(
Class<? extends VersionedProtocol> protocol,
long clientVersion, InetSocketAddress addr, UserGroupInformation ticket,
Configuration conf, SocketFactory factory, int rpcTimeout,
RetryPolicy connectionRetryPolicy,
boolean checkVersion) throws IOException {
if (UserGroupInformation.isSecurityEnabled()) {
SaslRpcServer.init(conf);
}
final Invoker invoker = new Invoker(protocol, addr, ticket, conf, factory,
rpcTimeout, connectionRetryPolicy);
VersionedProtocol proxy = (VersionedProtocol)Proxy.newProxyInstance(
protocol.getClassLoader(), new Class[]{protocol}, invoker);
if (checkVersion) {
checkVersion(protocol, clientVersion, proxy);
}
return proxy;
}
通过动态代理调用org.apache.hadoop.ipc.RPC中的处理器接口
private static class Invoker implements InvocationHandler {
private Client.ConnectionId remoteId;
private Client client;
private boolean isClosed = false;
private Invoker(Class<? extends VersionedProtocol> protocol,
InetSocketAddress address, UserGroupInformation ticket,
Configuration conf, SocketFactory factory,
int rpcTimeout, RetryPolicy connectionRetryPolicy) throws IOException {
this.remoteId = Client.ConnectionId.getConnectionId(address, protocol,
ticket, rpcTimeout, connectionRetryPolicy, conf);
this.client = CLIENTS.getClient(conf, factory);
}
public Object invoke(Object proxy, Method method, Object[] args)
throws Throwable {
final boolean logDebug = LOG.isDebugEnabled();
long startTime = 0;
if (logDebug) {
startTime = System.currentTimeMillis();
}
ObjectWritable value = (ObjectWritable)
client.call(new Invocation(method, args), remoteId);
if (logDebug) {
long callTime = System.currentTimeMillis() - startTime;
LOG.debug("Call: " + method.getName() + " " + callTime);
}
return value.get();
}
/* close the IPC client that's responsible for this invoker's RPCs */
synchronized private void close() {
if (!isClosed) {
isClosed = true;
CLIENTS.stopClient(client);
}
}
调用了org.apache.hadoop.ipc.Client中的call方法。。。
/** Make a call, passing <code>param</code>, to the IPC server defined by
* <code>remoteId</code>, returning the value.
* Throws exceptions if there are network problems or if the remote code
* threw an exception. */
public Writable call(Writable param, ConnectionId remoteId)
throws InterruptedException, IOException {
Call call = new Call(param);
Connection connection = getConnection(remoteId, call);
connection.sendParam(call); // send the parameter
boolean interrupted = false;
synchronized (call) {
while (!call.done) {
try {
call.wait(); // wait for the result
} catch (InterruptedException ie) {
// save the fact that we were interrupted
interrupted = true;
}
}
if (interrupted) {
// set the interrupt flag now that we are done waiting
Thread.currentThread().interrupt();
}
if (call.error != null) {
if (call.error instanceof RemoteException) {
call.error.fillInStackTrace();
throw call.error;
} else { // local exception
// use the connection because it will reflect an ip change, unlike
// the remoteId
throw wrapException(connection.getRemoteAddress(), call.error);
}
} else {
return call.value;
}
}
}
org.apache.hadoop.ipc.Client类分析。。。
Client主要完成的功能是发送远程过程调用信息并接收执行结果。
org.apache.hadoop.ipc.Client内部有两个重要的内部类,分别是Call和Connection。
Call类:该类封装了一个RPC请求,它包含五个成员变量,分别是唯一标识id、函数调用信息param、函数执行返回值value、出错或者异常信息error和执行完成标识符done。由于Hadoop RPC Server采用了异步方式处理客户端请求,这使得远程过程调用的发生顺序与结果返回顺序无直接关系,而Client端正是通过id识别不同的函数调用。当客户端向服务器端发送请求时,只需填充id和param两个变量,而剩下的三个变量:value,error和done,则由服务器端根据函数执行情况填充。
Connection类:Client与每个Server之间维护一个通信连接。该连接相关的基本信息及操作被封装到Connection类中。其中,基本信息主要包括:通信连接唯一标识(remoteId),与Server端通信的Socket(socket),网络输入数据流(in),网络输出数据流(out),保存RPC请求的哈希表(calls)等。