getBlockReaderLocal函数
这个函数用于短路读(HDFS-347)操作,创建的为BlockReaderLocal类对象,代码如下:
private BlockReader getBlockReaderLocal() throws InvalidToken {
if (LOG.isTraceEnabled()) {
LOG.trace(this + ": trying to construct a BlockReaderLocal " +
"for short-circuit reads.");
}
if (pathInfo == null) {
//获取domain socket路径信息(包括domain socket路径和当前状态)
pathInfo = clientContext.getDomainSocketFactory().
getPathInfo(inetSocketAddress, conf);
}
//判断是否支持短路读
if (!pathInfo.getPathState().getUsableForShortCircuit()) {
PerformanceAdvisory.LOG.debug(this + ": " + pathInfo + " is not " +
"usable for short circuit; giving up on BlockReaderLocal.");
return null;
}
//创建ShortCircuitCache类对象,这个对象在clientContext对象创建的时候创建
ShortCircuitCache cache = clientContext.getShortCircuitCache();
//用来创建一个ExtendedBlockId类对象,该对象中有blockid和poolid
ExtendedBlockId key = new ExtendedBlockId(block.getBlockId(), block.getBlockPoolId());
/*从ShortCircuitCache缓存中获取一个ShortCircuitReplicaInfo类对象,如果缓存中没有,那么就创建一个
*ShortCircuitReplicaInfo类中包含了ShortCircuitReplica类对象和InvalidToken异常类对象
*ShortCircuitReplica类封装了一个短路读数据块副本对的所有信息,只有获取了ShortCircuitReplica对象才能构造BlockReaderLocal对象完成短路读操作
*/
ShortCircuitReplicaInfo info = cache.fetchOrCreate(key, this);
InvalidToken exc = info.getInvalidTokenException();
if (exc != null) {
if (LOG.isTraceEnabled()) {
LOG.trace(this + ": got InvalidToken exception while trying to " +
"construct BlockReaderLocal via " + pathInfo.getPath());
}
throw exc;
}
if (info.getReplica() == null) {
if (LOG.isTraceEnabled()) {
PerformanceAdvisory.LOG.debug(this + ": failed to get " +
"ShortCircuitReplica. Cannot construct " +
"BlockReaderLocal via " + pathInfo.getPath());
}
return null;
}
//构造BlockReaderLocal类对象
return new BlockReaderLocal.Builder(conf).
setFilename(fileName).
setBlock(block).
setStartOffset(startOffset).
setShortCircuitReplica(info.getReplica()).
setVerifyChecksum(verifyChecksum).
setCachingStrategy(cachingStrategy).
setStorageType(storageType).
build();
}
我们进入到cache.fetchOrCreate()看看,代码如下:
/**
* Fetch or create a replica.
*
* You must hold the cache lock while calling this function.
*
* @param key Key to use for lookup.
* @param creator Replica creator callback. Will be called without
* the cache lock being held.
*
* @return Null if no replica could be found or created.
* The replica, otherwise.
*/
public ShortCircuitReplicaInfo fetchOrCreate(ExtendedBlockId key,
ShortCircuitReplicaCreator creator) {
Waitable<ShortCircuitReplicaInfo> newWaitable = null;
lock.lock();
try {
ShortCircuitReplicaInfo info = null;
do {
if (closed) {
if (LOG.isTraceEnabled()) {
LOG.trace(this + ": can't fetchOrCreate " + key +
" because the cache is closed.");
}
return null;
}
//从replicaInfoMap获取指定ExtendedBlockId(包含了block的id和block pool的id信息,可以唯一来定位某一个block)类对象的值
Waitable<ShortCircuitReplicaInfo> waitable = replicaInfoMap.get(key);
//如果找到了
if (waitable != null) {
try {
//开始去获取ShortCircuitReplicaInfo类对象
info = fetch(key, waitable);
} catch (RetriableException e) {
if (LOG.isDebugEnabled()) {
LOG.debug(this + ": retrying " + e.getMessage());
}
continue;
}
}
} while (false);
if (info != null) return info;
// We need to load the replica ourselves.
/*走到这里说明没有获取到ShortCircuitReplicaInfo类对象,先创建一个Waitable类对象
* 创建该对象主要是用来保证同一个数据块只有一个ShortCircuitReplicaInfo缓存对象
* 这里如果有另外一个线程过来发现在replicaInfoMap中已经有了对应的块信息,那么就
* 去fetch,而此时如果该ShortCircuitReplicaInfo类对象还在下面的create函数中
* 创建,那么就需要等待一会儿
*/
newWaitable = new Waitable<ShortCircuitReplicaInfo>(lock.newCondition());
replicaInfoMap.put(key, newWaitable);
} finally {
lock.unlock();
}
//创建ShortCircuitReplicaInfo类对象
return create(key, creator, newWaitable);
}
我们开始分析fetch函数,看它是怎么通过key和waitable来获取缓存中的ShortCircuitReplicaInfo类对象的,代码如下:
/**
* Fetch an existing ReplicaInfo object.
*
* @param key The key that we're using.
* @param waitable The waitable object to wait on.
* @return The existing ReplicaInfo object, or null if there is
* none.
*
* @throws RetriableException If the caller needs to retry.
*/
private ShortCircuitReplicaInfo fetch(ExtendedBlockId key,
Waitable<ShortCircuitReplicaInfo> waitable) throws RetriableException {
// Another thread is already in the process of loading this
// ShortCircuitReplica. So we simply wait for it to complete.
ShortCircuitReplicaInfo info;
try {
if (LOG.isTraceEnabled()) {
LOG.trace(this + ": found waitable for " + key);
}
//开始等待Waitable类对象中的ShortCircuitReplicaInfo类对象
info = waitable.await();
} catch (InterruptedException e) {
LOG.info(this + ": interrupted while waiting for " + key);
Thread.currentThread().interrupt();
throw new RetriableException("interrupted");
}
if (info.getInvalidTokenException() != null) {
LOG.warn(this + ": could not get " + key + " due to InvalidToken " +
"exception.", info.getInvalidTokenException());
return info;
}
ShortCircuitReplica replica = info.getReplica();
if (replica == null) {
LOG.warn(this + ": failed to get " + key);
return info;
}
//如果副本已经从缓存中删除了,抛出异常
if (replica.purged) {
// Ignore replicas that have already been purged from the cache.
throw new RetriableException("Ignoring purged replica " +
replica + ". Retrying.");
}
// Check if the replica is stale before using it.
// If it is, purge it and retry.
//如果replica对象已经过期了,那么就调用purge()函数将replica从缓存中清除
if (replica.isStale()) {
LOG.info(this + ": got stale replica " + replica + ". Removing " +
"this replica from the replicaInfoMap and retrying.");
// Remove the cache's reference to the replica. This may or may not
// trigger a close.
purge(replica);
throw new RetriableException("ignoring stale replica " + replica);
}
//增加replica的引用计数,同时将该replica的对象从相应的清空队列中删除,表示该对象已经被引用了不能被删除
ref(replica);
return info;
}
fetch函数中会一直等待(关于java中condition的详细描述,这里有一篇比较好的文章),直到该数据块对应的ShortCircuitReplicaInfo类对象创建好了,下面我们就来分析一下fetchOrCreate函数中的create函数,代码如下:
private ShortCircuitReplicaInfo create(ExtendedBlockId key,
ShortCircuitReplicaCreator creator,
Waitable<ShortCircuitReplicaInfo> newWaitable) {
// Handle loading a new replica.
ShortCircuitReplicaInfo info = null;
try {
if (LOG.isTraceEnabled()) {
LOG.trace(this + ": loading " + key);
}
//创建类对象
info = creator.createShortCircuitReplicaInfo();
} catch (RuntimeException e) {
LOG.warn(this + ": failed to load " + key, e);
}
//如果为null,那么久创建ShortCircuitReplicaInfo类对象,此时ShortCircuitReplica类对象和异常对象为null
if (info == null) info = new ShortCircuitReplicaInfo();
lock.lock();
try {
//如果ShortCircuitReplica类对象不为null,说明创建成功
if (info.getReplica() != null) {
// On success, make sure the cache cleaner thread is running.
if (LOG.isTraceEnabled()) {
LOG.trace(this + ": successfully loaded " + info.getReplica());
}
//启动缓存清除线程
startCacheCleanerThreadIfNeeded();
// Note: new ShortCircuitReplicas start with a refCount of 2,
// indicating that both this cache and whoever requested the
// creation of the replica hold a reference. So we don't need
// to increment the reference count here.
} else {
// On failure, remove the waitable from the replicaInfoMap.
//创建失败,那么从缓存队列中移除
Waitable<ShortCircuitReplicaInfo> waitableInMap = replicaInfoMap.get(key);
if (waitableInMap == newWaitable) replicaInfoMap.remove(key);
if (info.getInvalidTokenException() != null) {
LOG.warn(this + ": could not load " + key + " due to InvalidToken " +
"exception.", info.getInvalidTokenException());
} else {
LOG.warn(this + ": failed to load " + key);
}
}
/*创建好了ShortCircuitReplicaInfo类对象后,给其他等待的线程发送信号,告诉它们不用等待了,对象已经有了,可以获取了
*这里的信号释放后,wait并不会马上触发,它需要获取到lock锁,而此时lock锁还没有释放,等到执行完下面的lock.unlock()函数后
*释放了锁,其他wait的线程才能拿到锁并触发继续往下走
*/
newWaitable.provide(info);
} finally {
lock.unlock();
}
return info;
}
我们开始分析函数createShortCircuitReplicaInfo,这个函数用来创建ShortCircuitReplicaInfo类对象,该函数实际上调用的是BlockReader中的createShortCircuitReplicaInfo函数,代码如下:
/**
* Fetch a pair of short-circuit block descriptors from a local DataNode.
*
* @return Null if we could not communicate with the datanode,
* a new ShortCircuitReplicaInfo object otherwise.
* ShortCircuitReplicaInfo objects may contain either an InvalidToken
* exception, or a ShortCircuitReplica object ready to use.
*/
@Override
public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() {
if (createShortCircuitReplicaInfoCallback != null) {
ShortCircuitReplicaInfo info =
createShortCircuitReplicaInfoCallback.createShortCircuitReplicaInfo();
if (info != null) return info;
}
if (LOG.isTraceEnabled()) {
LOG.trace(this + ": trying to create ShortCircuitReplicaInfo.");
}
BlockReaderPeer curPeer;
while (true) {
//获取BlockReaderPeer类对象,首先尝试从缓存中读取,如果缓存中没有那么就创建
curPeer = nextDomainPeer();
if (curPeer == null) break;
//如果该BlockReaderPeer类对象是从缓存中获取的,那么就将缓存剩余尝试次数减一
if (curPeer.fromCache) remainingCacheTries--;
DomainPeer peer = (DomainPeer)curPeer.peer;
Slot slot = null;
ShortCircuitCache cache = clientContext.getShortCircuitCache();
try {
MutableBoolean usedPeer = new MutableBoolean(false);
//分配一个共享内存槽位
slot = cache.allocShmSlot(datanode, peer, usedPeer,
new ExtendedBlockId(block.getBlockId(), block.getBlockPoolId()),
clientName);
if (usedPeer.booleanValue()) {
if (LOG.isTraceEnabled()) {
LOG.trace(this + ": allocShmSlot used up our previous socket " +
peer.getDomainSocket() + ". Allocating a new one...");
}
curPeer = nextDomainPeer();
if (curPeer == null) break;
peer = (DomainPeer)curPeer.peer;
}
ShortCircuitReplicaInfo info = requestFileDescriptors(peer, slot);
clientContext.getPeerCache().put(datanode, peer);
return info;
} catch (IOException e) {
if (slot != null) {
//释放槽位
cache.freeSlot(slot);
}
if (curPeer.fromCache) {
// Handle an I/O error we got when using a cached socket.
// These are considered less serious, because the socket may be stale.
if (LOG.isDebugEnabled()) {
LOG.debug(this + ": closing stale domain peer " + peer, e);
}
//调用peer中的close函数
IOUtils.cleanup(LOG, peer);
} else {
// Handle an I/O error we got when using a newly created socket.
// We temporarily disable the domain socket path for a few minutes in
// this case, to prevent wasting more time on it.
LOG.warn(this + ": I/O error requesting file descriptors. " +
"Disabling domain socket " + peer.getDomainSocket(), e);
IOUtils.cleanup(LOG, peer);
clientContext.getDomainSocketFactory()
.disableDomainSocketPath(pathInfo.getPath());
return null;
}
}
}
return null;
}
下面是相关的流程图:
相关的类结构图如下:
到此函数getBlockReaderLocal就基本分析完了,后面有空的时候会对内存槽位的分配进行详解。