Hadoop IPC类图如下
连接
//为了提高通讯效率,连接是可以复用的,通过ConnectionId来区分不同的连接
class ConnectionId {
InetSocketAddress address; //远端服务器的地址
UserGroupInformation ticket; //用户和用户组的信息
Class<?> protocol; //IPC接口对应的类对象
}
//ConnectionHeader类是客户端和服务端TCP连接建立之后交换的第一条消息,包括ConnectionId中的
//用户信息和IPC接口信息,用于确认用户是否有权利连接
ConnectionHeader
//服务端连接对象
public class Connection {
private boolean rpcHeaderRead = false; //是否已读如入了RPC版本号
private boolean headerRead = false; //是否读入了连接消息头
private SocketChannel channel;
private ByteBuffer data;
private ByteBuffer dataLengthBuffer;
private LinkedList<Call> responseQueue;
private volatile int rpcCount = 0; //当前正在处理的RPC数量
private long lastContact;
private int dataLength;
private Socket socket;
// Cache the remote host & port info so that even if the socket is
// disconnected, we can say where it used to connect to.
private String hostAddress;
private int remotePort;
private InetAddress addr;
ConnectionHeader header = new ConnectionHeader();
Class<?> protocol;
boolean useSasl;
SaslServer saslServer;
private AuthMethod authMethod;
private boolean saslContextEstablished;
private boolean skipInitialSaslHandshake;
private ByteBuffer rpcHeaderBuffer;
private ByteBuffer unwrappedData;
private ByteBuffer unwrappedDataLengthBuffer;
UserGroupInformation user = null;
}
//客户端连接
private class Connection extends Thread {
private InetSocketAddress server; //IPC服务端地址
private String serverPrincipal; // server's krb5 principal name
private ConnectionHeader header; //连接消息头
private final ConnectionId remoteId; //IPC连接标识
private AuthMethod authMethod; // authentication method
private boolean useSasl;
private Token<? extends TokenIdentifier> token;
private SaslRpcClient saslRpcClient;
private Socket socket = null; // connected socket
private DataInputStream in;
private DataOutputStream out;
private int rpcTimeout;
private int maxIdleTime; //connections will be culled if it was idle for
//maxIdleTime msecs
private int maxRetries; //the max. no. of retries for socket connections
private boolean tcpNoDelay; // if T then disable Nagle's Algorithm
private int pingInterval; // how often sends ping to the server in msecs
// currently active calls
private Hashtable<Integer, Call> calls = new Hashtable<Integer, Call>();
private AtomicLong lastActivity = new AtomicLong();// last I/O activity time
private AtomicBoolean shouldCloseConnection = new AtomicBoolean(); // indicate if the connection is closed
private IOException closeException; // close reason
}
Call
//客户端
private class Call {
int id; // call id
Writable param; // parameter
Writable value; // value, null if error
IOException error; // exception, null if value
boolean done;
}
//服务端
private static class Call {
private int id; // the client's call id
private Writable param; // the parameter passed
private Connection connection; // connection to client
private long timestamp;
}
//客户端和服务端通过各自的Call对象发送调用
客户端还有ParallelCall 用于同时发送多个远程IPC调用
服务端处理
//处理监听事件的线程
class Listener extends Thread {
//创建SeverSocketChannel,并注册ACCEPT事件
public Listener() {
acceptChannel = ServerSocketChannel.open();
acceptChannel.configureBlocking(false);
// Bind the server socket to the local host and port
bind(acceptChannel.socket(), address, backlogLength);
port = acceptChannel.socket().getLocalPort(); //Could be an ephemeral port
// create a selector;
selector= Selector.open();
readers = new Reader[readThreads];
readPool = Executors.newFixedThreadPool(readThreads);
for (int i = 0; i < readThreads; i++) {
Selector readSelector = Selector.open();
Reader reader = new Reader(readSelector);
readers[i] = reader;
readPool.execute(reader);
}
acceptChannel.register(selector, SelectionKey.OP_ACCEPT);
}
//处理ACCEPT事件
public void run() {
selector.select();
Iterator<SelectionKey> iter = selector.selectedKeys().iterator();
while (iter.hasNext()) {
key = iter.next();
iter.remove();
if (key.isValid()) {
if (key.isAcceptable())
doAccept(key);
}
}
}
}
//Reader线程,用于处理读事件并交由Handler线程处理
class Reader implements Runnable {
public void run() {
readSelector.select();
while (adding) {
this.wait(1000);
}
Iterator<SelectionKey> iter = readSelector.selectedKeys().iterator();
while (iter.hasNext()) {
key = iter.next();
iter.remove();
if (key.isValid()) {
if (key.isReadable()) {
doRead(key);
}
}
}
}
}
//异步的处理写事件
class Responder extends Thread {
public void run() {
waitPending(); // If a channel is being registered, wait.
writeSelector.select(PURGE_INTERVAL);
Iterator<SelectionKey> iter = writeSelector.selectedKeys().iterator();
while (iter.hasNext()) {
SelectionKey key = iter.next();
iter.remove();
if (key.isValid() && key.isWritable()) {
doAsyncWrite(key);
}
}
synchronized (writeSelector.keys()) {
calls = new ArrayList<Call>(writeSelector.keys().size());
iter = writeSelector.keys().iterator();
while (iter.hasNext()) {
SelectionKey key = iter.next();
Call call = (Call)key.attachment();
if (call != null && key.channel() == call.connection.channel) {
calls.add(call);
}
}
}
}
}
void doAsyncWrite(SelectionKey key) {
synchronized(call.connection.responseQueue) {
processResponse(call.connection.responseQueue, false);
}
}
//inHandler用于表示是Handler中直接调用写操作
//还是Responer线程的异步写操作
void processResponse(LinkedList<Call> responseQueue,boolean inHandler) {
call = responseQueue.removeFirst();
SocketChannel channel = call.connection.channel;
int count = (buffer.remaining() <= NIO_BUFFER_LIMIT) ?
channel.write(buffer) : channelIO(null, channel, buffer);
}
void doRead(SelectionKey key) {
Connection c = (Connection)key.attachment();
count = c.readAndProcess();
}