BlockReaderLocal类中read(ByteBuffer buf)函数
代码如下:
@Override
public synchronized int read(ByteBuffer buf) throws IOException {
//判断能否跳过数据校验
boolean canSkipChecksum = createNoChecksumContext();
try {
String traceString = null;
if (LOG.isTraceEnabled()) {
traceString = new StringBuilder().
append("read(").
append("buf.remaining=").append(buf.remaining()).
append(", block=").append(block).
append(", filename=").append(filename).
append(", canSkipChecksum=").append(canSkipChecksum).
append(")").toString();
LOG.info(traceString + ": starting");
}
int nRead;
try {
//如果可以跳过数据校验以及不需要预读取时,调用readWithoutBounceBuffer函数
if (canSkipChecksum && zeroReadaheadRequested) {
nRead = readWithoutBounceBuffer(buf);
} else {
//需要数据校验或者开启了预读取时,调用readWithBounceBuffer函数
nRead = readWithBounceBuffer(buf, canSkipChecksum);
}
} catch (IOException e) {
if (LOG.isTraceEnabled()) {
LOG.info(traceString + ": I/O error", e);
}
throw e;
}
if (LOG.isTraceEnabled()) {
LOG.info(traceString + ": returning " + nRead);
}
return nRead;
} finally {
//如果可以跳过数据校验,那么肯定是创建了一个免校验上下文,这里需要进行相应的资源释放
if (canSkipChecksum) releaseNoChecksumContext();
}
}
通过函数createNoChecksumContext()来判断是否可以跳过数据校验,该函数代码如下:
private boolean createNoChecksumContext() {
if (verifyChecksum) {
//如果存在存储类型,且存储属于不可持久化的
if (storageType != null && storageType.isTransient()) {
// Checksums are not stored for replicas on transient storage. We do not
// anchor, because we do not intend for client activity to block eviction
// from transient storage on the DataNode side.
return true;
} else {
return replica.addNoChecksumAnchor();
}
} else {
return true;
}
}
如果存储类型为不可持久化,也就是说数据是存储在内存中,什么意思呢?就是说如果datanode上的副本在内存中那么就不需要数据校验。
在可以跳过数据校验以及不需要预读取的情况下我们调用readWithoutBounceBuffer函数,代码如下:
private synchronized int readWithoutBounceBuffer(ByteBuffer buf)
throws IOException {
//释放dataBuffer
freeDataBufIfExists();
//释放checksumBuffer
freeChecksumBufIfExists();
int total = 0;
//如果还有空间剩余
while (buf.hasRemaining()) {
//从dataPos位置开始将dataIn数据读入到buf中
int nRead = dataIn.read(buf, dataPos);
if (nRead <= 0) break;
//更新读到的数据大小
dataPos += nRead;
//更新此次读取数据的总大小
total += nRead;
}
return (total == 0 && (dataPos == dataIn.size())) ? -1 : total;
}
freeDataBufIfExists函数用来回收dataBuffer所对应的空间,代码如下:
private synchronized void freeDataBufIfExists() {
//缓冲从当前datapos开始并扩展到databuf.limit()的数据
if (dataBuf != null) {
// When disposing of a dataBuf, we have to move our stored file index
// backwards.
//将dataPos减去dataBuf中剩下的数据大小
dataPos -= dataBuf.remaining();
//将dataBuf中的各个变量重置
dataBuf.clear();
//这里bufferPool具有创建堆外内存以及回收创建的堆外内存等
bufferPool.returnBuffer(dataBuf);
//将dataBuf置空,这样下次gc的时候很有可能会将该空间释放掉
dataBuf = null;
}
}
freeChecksumBufIfExists函数与freeDataBufIfExists函数一样,用来回收checksumBuffer对应的空间。
readWithBounceBuffer函数的代码如下:
/**
* Read using the bounce buffer.
*
* A 'direct' read actually has three phases. The first drains any
* remaining bytes from the slow read buffer. After this the read is
* guaranteed to be on a checksum chunk boundary. If there are still bytes
* to read, the fast direct path is used for as many remaining bytes as
* possible, up to a multiple of the checksum chunk size. Finally, any
* 'odd' bytes remaining at the end of the read cause another slow read to
* be issued, which involves an extra copy.
*
* Every 'slow' read tries to fill the slow read buffer in one go for
* efficiency's sake. As described above, all non-checksum-chunk-aligned
* reads will be served from the slower read path.
*
* @param buf The buffer to read into.
* @param canSkipChecksum True if we can skip checksums.
*/
private synchronized int readWithBounceBuffer(ByteBuffer buf,
boolean canSkipChecksum) throws IOException {
int total = 0;
//将dataBuf缓冲区中的数据拉取到buf中,然后返回读取的字节数
int bb = drainDataBuf(buf); // drain bounce buffer if possible
//如果拉取到数据了
if (bb >= 0) {
//计算总拉取数据的大小
total += bb;
//如果buf没有空间存储数据了,那么就返回总读取的数据大小
if (buf.remaining() == 0) return total;
}
boolean eof = true, done = false;
do {
//如果buf是堆外内存而且buf中的剩余空间大于等于maxReadaheadLength,且输入流游标在chunk边界上,则直接从IO流中将数据写入buf
if (buf.isDirect() && (buf.remaining() >= maxReadaheadLength)
&& ((dataPos % bytesPerChecksum) == 0)) {
// Fast lane: try to read directly into user-supplied buffer, bypassing
// bounce buffer.
int oldLimit = buf.limit();
int nRead;
try {
//将limit设置成刚好还可以读取maxReadaheadLength大小的数据
buf.limit(buf.position() + maxReadaheadLength);
//就是读取文件数据和校验和数据,然后进行校验,如果校验失败抛出异常
nRead = fillBuffer(buf, canSkipChecksum);
} finally {
buf.limit(oldLimit);
}
if (nRead < maxReadaheadLength) {
done = true;
}
if (nRead > 0) {
eof = false;
}
total += nRead;
} else {
// Slow lane: refill bounce buffer.
//将数据读入到dataBuf中
if (fillDataBuf(canSkipChecksum)) {
done = true;
}
//将dataBuf中的数据读入到buf中
bb = drainDataBuf(buf); // drain bounce buffer if possible
if (bb >= 0) {
eof = false;
total += bb;
}
}
} while ((!done) && (buf.remaining() > 0));
return (eof && total == 0) ? -1 : total;
}
drainDataBuf函数的代码如下:
//这里dataBuf是数据缓冲区,它的大小始终是校验块的整数倍,这样设计是为了进行校验操作时比较方便,能够以校验块为单位读取数据。
private synchronized int drainDataBuf(ByteBuffer buf) {
if (dataBuf == null) return -1;
int oldLimit = dataBuf.limit();
int nRead = Math.min(dataBuf.remaining(), buf.remaining());
if (nRead == 0) {
//如果dataBuf数据都已经拉取完了,那么就返回-1,否则说明buf中没有可以存储数据的空间了,返回0
return (dataBuf.remaining() == 0) ? -1 : 0;
}
try {
//设置dataBuf的limit值,保证刚好可以将dataBuf中的所有数据拉取到buf中或者将buf的剩余空间填满
dataBuf.limit(dataBuf.position() + nRead);
buf.put(dataBuf);
} finally {
//将dataBuf的limit恢复到原来状态
dataBuf.limit(oldLimit);
}
return nRead;
}
fillBuffer函数的代码如下:
/**
* Read from the block file into a buffer.
*
* This function overwrites checksumBuf. It will increment dataPos.
*
* @param buf The buffer to read into. May be dataBuf.
* The position and limit of this buffer should be set to
* multiples of the checksum size.
* @param canSkipChecksum True if we can skip checksumming.
*
* @return Total bytes read. 0 on EOF.
*/
//将dataIn中的数据读取到buf中
private synchronized int fillBuffer(ByteBuffer buf, boolean canSkipChecksum)
throws IOException {
int total = 0;
long startDataPos = dataPos;
int startBufPos = buf.position();
//不断的将dataIn从dataPos位置开始的的数据读入到buf中,直到buf没有空间存储数据或者dataIn没有可读数据为止
while (buf.hasRemaining()) {
int nRead = dataIn.read(buf, dataPos);
if (nRead < 0) {
break;
}
//更新读取位置
dataPos += nRead;
//更新总读取数据大小
total += nRead;
}
//如果可以跳过文件校验
if (canSkipChecksum) {
//将校验和数据回收到缓存池中,等待被释放
freeChecksumBufIfExists();
return total;
}
//如果需要文件校验,同时也读取到了数据,那么就要开始数据校验
if (total > 0) {
try {
//这里就是保证buf中剩下可读的数据为刚刚从dataIn中读到的数据
buf.limit(buf.position());
buf.position(startBufPos);
//如果checksumBuf为null,那么就从回收池中获取已有的相应数据空间
createChecksumBufIfNeeded();
//获取读到的数据最多可以划分成校验块的个数
int checksumsNeeded = (total + bytesPerChecksum - 1) / bytesPerChecksum;
checksumBuf.clear();
checksumBuf.limit(checksumsNeeded * checksumSize);
//checksumPos为校验头大小+已经校验完了的数据大小,也就是说
//checksumPos就是当前校验到的数据位置
long checksumPos = BlockMetadataHeader.getHeaderSize()
+ ((startDataPos / bytesPerChecksum) * checksumSize);
//循环的将检验和文件中的数据读取到checksumBuf中,直到校验和文件中没有数据可读了或者checksumBuf已经被写满了
while (checksumBuf.hasRemaining()) {
//将校验和文件中从checksumPos开始读取数据到checksumBuf中
int nRead = checksumIn.read(checksumBuf, checksumPos);
//如果读数出现问题,那么抛出异常
if (nRead < 0) {
throw new IOException("Got unexpected checksum file EOF at " +
checksumPos + ", block file position " + startDataPos + " for " +
"block " + block + " of file " + filename);
}
//更新校验和读取数据位置
checksumPos += nRead;
}
//初始化
checksumBuf.flip();
//将文件数据buf和校验和数据checksumBuf进行校验,如果不匹配就抛出异常
checksum.verifyChunkedSums(buf, checksumBuf, filename, startDataPos);
} finally {
//将pos为已经读过的数据大小
buf.position(buf.limit());
}
}
return total;
}
由于readWithBounceBuffer函数比较复杂,所以我们来总结一下该函数的流程,如下图:
当然根据图我们只能知道一个大概,相应真正理解还需要自己去看源代码,这里面其实用到了对齐的方法,主要就是要围绕数据校验块,进而方便进行数据校验。