Client端读文件流程
创建DFSInputStream
具体流程
代码分析:
FileSystem.get方法获取DistributedFileSystem对象
fileSystem = FileSystem.get(configuration);
System.out.println(fileSystem.getClass());
输出
//根据configuration传入的参数分析schema后获取的hdfs对应的文件系统DistributedFileSystem,具体流程不再展示
class org.apache.hadoop.hdfs.DistributedFileSystem
DistributedFileSystem.open获取一个FSDataInputStream流对象
FSDataInputStream open = fileSystem.open(new Path(""));
DistributedFileSystem.open方法:
public FSDataInputStream open(Path f, final int bufferSize) throws IOException {
...
return (FSDataInputStream)(new FileSystemLinkResolver<FSDataInputStream>() {
// 重写的doCall方法
public FSDataInputStream doCall(Path p) throws IOException {
//调用了DFSClient.open方法,返回DFSInputStream
DFSInputStream dfsis = DistributedFileSystem.this.dfs.open(DistributedFileSystem.this.getPathName(p), bufferSize, DistributedFileSystem.this.verifyChecksum);
// 对DFSInputStream进行包装,返回HdfsDataInputStream
return DistributedFileSystem.this.dfs.createWrappedInputStream(dfsis);
}
// 重写的next方法:
public FSDataInputStream next(FileSystem fs, Path p) throws IOException {
return fs.open(p, bufferSize);
}
}).resolve(this, absF);
}
DFSClient.open方法:
public DFSInputStream open(String src, int buffersize, boolean verifyChecksum)
throws IOException {
checkOpen();
// Get block info from namenode
try (TraceScope ignored = newPathTraceScope("newDFSInputStream", src)) {
// 此过程包含RPC调用,从namenode获取块信息
LocatedBlocks locatedBlocks = getLocatedBlocks(src, 0);
return openInternal(locatedBlocks, src, verifyChecksum);
}
}
openInternal:方法:
private DFSInputStream openInternal(LocatedBlocks locatedBlocks, String src,
boolean verifyChecksum) throws IOException {
if (locatedBlocks != null) {
// 纠删码策略
ErasureCodingPolicy ecPolicy = locatedBlocks.getErasureCodingPolicy();
if (ecPolicy != null) {
return new DFSStripedInputStream(this, src, verifyChecksum, ecPolicy,
locatedBlocks);
}
// 返回一个DFSInputStream对象
return new DFSInputStream(this, src, verifyChecksum, locatedBlocks);
} else {
throw new IOException("Cannot open filename " + src);
}
}
DFSInputStream构造方法:
DFSInputStream(DFSClient dfsClient, String src, boolean verifyChecksum,
LocatedBlocks locatedBlocks) throws IOException {
this.dfsClient = dfsClient;
this.verifyChecksum = verifyChecksum;
this.src = src;
synchronized (infoLock) {
//缓存策略
/**
* this.defaultReadCachingStrategy = new CachingStrategy(readDropBehind, readahead);
* readDropBehind:dfs.client.cache.drop.behind.reads
* readahead: dfs.client.cache.readahead
* /
this.cachingStrategy = dfsClient.getDefaultReadCachingStrategy();
}
this.locatedBlocks = locatedBlocks;
openInfo(false);
}
openInfo方法:
void openInfo(boolean refreshLocatedBlocks) throws IOException {
final DfsClientConf conf = dfsClient.getConf();
synchronized(infoLock) {
lastBlockBeingWrittenLength = fetchLocatedBlocksAndGetLastBlockLength(refreshLocatedBlocks);
int retriesForLastBlockLength = conf.getRetryTimesForGetLastBlockLength();
while (retriesForLastBlockLength > 0) {
// Getting last block length as -1 is a special case. When cluster
// restarts, DNs may not report immediately. At this time partial block
// locations will not be available with NN for getting the length. Lets
// retry for 3 times to get the length.
if (lastBlockBeingWrittenLength == -1) {
DFSClient.LOG.warn("Last block locations not available. "
+ "Datanodes might not have reported blocks completely."
+ " Will retry for " + retriesForLastBlockLength + " times");
waitFor(conf.getRetryIntervalForGetLastBlockLength());
lastBlockBeingWrittenLength =
fetchLocatedBlocksAndGetLastBlockLength(true);
} else {
break;
}
retriesForLastBlockLength--;
}
if (lastBlockBeingWrittenLength == -1
&& retriesForLastBlockLength == 0) {
throw new IOException("Could not obtain the last block locations.");
}
}
}
DFSInputStream对象中包含要读取的文件的所有的block信息
LocatedBlocks信息:
///zookeeper-3.4.12/LICENSE.txt文件,11.66KB
LocatedBlocks{
;
fileLength=11938;
underConstruction=false;
blocks=[
LocatedBlock{
BP-1490162904-10.180.249.219-1595812228535:blk_1073743154_2331;
getBlockSize()=11938;
corrupt=false;
offset=0;
locs=[
DatanodeInfoWithStorage[
10.180.249.215:9866,DS-71345a2c-d308-478f-82f1-b8d276262f7d,DISK
]
]
}
];
lastLocatedBlock=LocatedBlock{
BP-1490162904-10.180.249.219-1595812228535:blk_1073743154_2331;
getBlockSize()=11938;
corrupt=false;
offset=0;
locs=[
DatanodeInfoWithStorage[
10.180.249.215:9866,DS-71345a2c-d308-478f-82f1-b8d276262f7d,DISK
]
]
};
isLastBlockComplete=true;
ecPolicy=null
}
对应的块文件在Linux的存储:
// 真实的文件内容
-rw-r--r--. 1 root root 11938 Aug 27 14:26 blk_1073743154
// 文件信息
-rw-r--r--. 1 root root 103 Aug 27 14:26 blk_1073743154_2331.meta
// centos的镜像文件,大小8.1G
LocatedBlocks{
;
fileLength=8694792192;
underConstruction=false;
blocks=[
LocatedBlock{
BP-1490162904-10.180.249.219-1595812228535:blk_1073742959_2136; getBlockSize()=134217728; corrupt=false; offset=0; locs=[DatanodeInfoWithStorage[10.180.249.226:9866,DS-d822e8bc-c946-4b93-8208-597b08c06b94,DISK], DatanodeInfoWithStorage[10.180.249.215:9866,DS-71345a2c-d308-478f-82f1-b8d276262f7d,DISK], DatanodeInfoWithStorage[10.180.249.225:9866,DS-3d27fa2a-8c66-4774-9092-ab75f267355f,DISK]]
},
LocatedBlock{
BP-1490162904-10.180.249.219-1595812228535:blk_1073742960_2137; getBlockSize()=134217728; corrupt=false; offset=134217728; locs=[DatanodeInfoWithStorage[10.180.249.225:9866,DS-3d27fa2a-8c66-4774-9092-ab75f267355f,DISK], DatanodeInfoWithStorage[10.180.249.215:9866,DS-71345a2c-d308-478f-82f1-b8d276262f7d,DISK], DatanodeInfoWithStorage[10.180.249.226:9866,DS-d822e8bc-c946-4b93-8208-597b08c06b94,DISK]]
},
LocatedBlock{
BP-1490162904-10.180.249.219-1595812228535:blk_1073742961_2138; getBlockSize()=134217728; corrupt=false; offset=268435456; locs=[DatanodeInfoWithStorage[10.180.249.215:9866,DS-71345a2c-d308-478f-82f1-b8d276262f7d,DISK], DatanodeInfoWithStorage[10.180.249.225:9866,DS-3d27fa2a-8c66-4774-9092-ab75f267355f,DISK], DatanodeInfoWithStorage[10.180.249.226:9866,DS-d822e8bc-c946-4b93-8208-597b08c06b94,DISK]]
},
LocatedBlock{
BP-1490162904-10.180.249.219-1595812228535:blk_1073742962_2139; getBlockSize()=134217728; corrupt=false; offset=402653184; locs=[DatanodeInfoWithStorage[10.180.249.225:9866,DS-3d27fa2a-8c66-4774-9092-ab75f267355f,DISK], DatanodeInfoWithStorage[10.180.249.226:9866,DS-d822e8bc-c946-4b93-8208-597b08c06b94,DISK], DatanodeInfoWithStorage[10.180.249.215:9866,DS-71345a2c-d308-478f-82f1-b8d276262f7d,DISK]]
},
LocatedBlock{
BP-1490162904-10.180.249.219-1595812228535:blk_1073742963_2140; getBlockSize()=134217728; corrupt=false; offset=536870912; locs=[DatanodeInfoWithStorage[10.180.249.225:9866,DS-3d27fa2a-8c66-4774-9092-ab75f267355f,DISK], DatanodeInfoWithStorage[10.180.249.226:9866,DS-d822e8bc-c946-4b93-8208-597b08c06b94,DISK], DatanodeInfoWithStorage[10.180.249.215:9866,DS-71345a2c-d308-478f-82f1-b8d276262f7d,DISK]]
},
LocatedBlock{
BP-1490162904-10.180.249.219-1595812228535:blk_1073742964_2141; getBlockSize()=134217728; corrupt=false; offset=671088640; locs=[DatanodeInfoWithStorage[10.180.249.225:9866,DS-3d27fa2a-8c66-4774-9092-ab75f267355f,DISK], DatanodeInfoWithStorage[10.180.249.226:9866,DS-d822e8bc-c946-4b93-8208-597b08c06b94,DISK], DatanodeInfoWithStorage[10.180.249.215:9866,DS-71345a2c-d308-478f-82f1-b8d276262f7d,DISK]]
},
LocatedBlock{
BP-1490162904-10.180.249.219-1595812228535:blk_1073742965_2142; getBlockSize()=134217728; corrupt=false; offset=805306368; locs=[DatanodeInfoWithStorage[10.180.249.215:9866,DS-71345a2c-d308-478f-82f1-b8d276262f7d,DISK], DatanodeInfoWithStorage[10.180.249.226:9866,DS-d822e8bc-c946-4b93-8208-597b08c06b94,DISK], DatanodeInfoWithStorage[10.180.249.225:9866,DS-3d27fa2a-8c66-4774-9092-ab75f267355f,DISK]]
},
LocatedBlock{
BP-1490162904-10.180.249.219-1595812228535:blk_1073742966_2143; getBlockSize()=134217728; corrupt=false; offset=939524096; locs=[DatanodeInfoWithStorage[10.180.249.215:9866,DS-71345a2c-d308-478f-82f1-b8d276262f7d,DISK], DatanodeInfoWithStorage[10.180.249.226:9866,DS-d822e8bc-c946-4b93-8208-597b08c06b94,DISK], DatanodeInfoWithStorage[10.180.249.225:9866,DS-3d27fa2a-8c66-4774-9092-ab75f267355f,DISK]]
},
LocatedBlock{
BP-1490162904-10.180.249.219-1595812228535:blk_1073742967_2144; getBlockSize()=134217728; corrupt=false; offset=1073741824; locs=[DatanodeInfoWithStorage[10.180.249.215:9866,DS-71345a2c-d308-478f-82f1-b8d276262f7d,DISK], DatanodeInfoWithStorage[10.180.249.225:9866,DS-3d27fa2a-8c66-4774-9092-ab75f267355f,DISK], DatanodeInfoWithStorage[10.180.249.226:9866,DS-d822e8bc-c946-4b93-8208-597b08c06b94,DISK]]
},
LocatedBlock{
BP-1490162904-10.180.249.219-1595812228535:blk_1073742968_2145; getBlockSize()=134217728; corrupt=false; offset=1207959552; locs=[DatanodeInfoWithStorage[10.180.249.215:9866,DS-71345a2c-d308-478f-82f1-b8d276262f7d,DISK], DatanodeInfoWithStorage[10.180.249.225:9866,DS-3d27fa2a-8c66-4774-9092-ab75f267355f,DISK], DatanodeInfoWithStorage[10.180.249.226:9866,DS-d822e8bc-c946-4b93-8208-597b08c06b94,DISK]]
}
];
lastLocatedBlock=LocatedBlock{BP-1490162904-10.180.249.219-1595812228535:blk_1073743023_2200; getBlockSize()=104857600;
corrupt=false;
offset=8589934592;
locs=[DatanodeInfoWithStorage[10.180.249.225:9866,DS-3d27fa2a-8c66-4774-9092-ab75f267355f,DISK], DatanodeInfoWithStorage[10.180.249.226:9866,DS-d822e8bc-c946-4b93-8208-597b08c06b94,DISK], DatanodeInfoWithStorage[10.180.249.215:9866,DS-71345a2c-d308-478f-82f1-b8d276262f7d,DISK]]};
isLastBlockComplete=true;
ecPolicy=null}
到此已经获取到了要读取文件的所有块信息,开始read流程
DFSInputStream的read流程
DFSInputStream.read方法:
public synchronized int read(@Nonnull final byte buf[], int off, int len)
throws IOException {
...
ReaderStrategy byteArrayReader =
new ByteArrayStrategy(buf, off, len, readStatistics, dfsClient);
return readWithStrategy(byteArrayReader);
}
DFSInputStream.readWithStrategy方法:
protected synchronized int readWithStrategy(ReaderStrategy strategy)
throws IOException {
// 每次读取数据的长度
int len = strategy.getTargetLength();
...
// pos初始值为0,getFileLength返回值是文件的总字节数
if (pos < getFileLength()) {
int retries = 2;
while (retries > 0) {
try {
...
// blockEnd 初始值为-1
if (pos > blockEnd || currentNode == null) {
// 根据要读取的位置pos寻找具体的block
currentNode = blockSeekTo(pos);
}
int realLen = (int) Math.min(len, (blockEnd - pos + 1L));
synchronized(infoLock) {
if (locatedBlocks.isLastBlockComplete()) {
realLen = (int) Math.min(realLen,
locatedBlocks.getFileLength() - pos);
}
}
int result = readBuffer(strategy, realLen, corruptedBlocks);
if (result >= 0) {
pos += result;
} else {
// got a EOS from reader though we expect more data on it.
throw new IOException("Unexpected EOS from the reader");
}
return result;
}
...
return -1;
}
DFSInputStream.blockSeekTo方法:
// target就是要要读取的文件位置,首次读取时值为0
private synchronized DatanodeInfo blockSeekTo(long target)
throws IOException {
// 判断要读取的位置是否超过了所有LocatedBlock块的长度
if (target >= getFileLength()) {
throw new IOException("Attempted to read past end of file");
}
...
while (true) {
//
// Compute desired block
//获取想要的block
LocatedBlock targetBlock = getBlockAt(target);
// update current position
this.pos = target;
this.blockEnd = targetBlock.getStartOffset() +
targetBlock.getBlockSize() - 1;
this.currentLocatedBlock = targetBlock;
long offsetIntoBlock = target - targetBlock.getStartOffset();
// 根据block选择datanode节点
DNAddrPair retval = chooseDataNode(targetBlock, null);
chosenNode = retval.info;
InetSocketAddress targetAddr = retval.addr;
StorageType storageType = retval.storageType;
// Latest block if refreshed by chooseDatanode()
targetBlock = retval.block;
try {
// 获取合适的reader
blockReader = getBlockReader(targetBlock, offsetIntoBlock,
targetBlock.getBlockSize() - offsetIntoBlock, targetAddr,
storageType, chosenNode);
if(connectFailedOnce) {
DFSClient.LOG.info("Successfully connected to " + targetAddr +
" for " + targetBlock.getBlock());
}
return chosenNode;
}
...
}
选择block
DFSInputStream.getBlockAt方法:
// offset:从offset位置开始读取文件内容
protected LocatedBlock getBlockAt(long offset) throws IOException {
synchronized(infoLock) {
DFSInputStream.getBlockAt源码:
//check offset if (offset < 0 || offset >= getFileLength()) { throw new IOException("offset < 0 || offset >= getFileLength(), offset=" + offset + ", locatedBlocks=" + locatedBlocks); } else if (offset >= locatedBlocks.getFileLength()) { // offset to the portion of the last block, // which is not known to the name-node yet; // getting the last block 直接获取最后一个块 blk = locatedBlocks.getLastLocatedBlock(); } else { // search cached blocks first // 根据offset寻找指定的块 blk = fetchBlockAt(offset, 0, true); }
DFSInputStream.getBlockAt字节码文件:
if (offset >= 0L && offset < this.getFileLength()) {
LocatedBlock blk;
if (offset >= this.locatedBlocks.getFileLength()) {
blk = this.locatedBlocks.getLastLocatedBlock();
} else {
blk = this.fetchBlockAt(offset, 0L, true);
}
return blk;
} else {
throw new IOException(“offset < 0 || offset >= getFileLength(), offset=” + offset + “, locatedBlocks=” +this.locatedBlocks);
}
return blk;
}
}
DFSInputStream.fetchBlockAt方法:
private LocatedBlock fetchBlockAt(long offset, long length, boolean useCache)
throws IOException {
synchronized(infoLock) {
int targetBlockIdx = locatedBlocks.findBlock(offset);
if (targetBlockIdx < 0) { // block is not cached
targetBlockIdx = LocatedBlocks.getInsertIndex(targetBlockIdx);
useCache = false;
}
if (!useCache) { // fetch blocks
final LocatedBlocks newBlocks = (length == 0)
? dfsClient.getLocatedBlocks(src, offset)
: dfsClient.getLocatedBlocks(src, offset, length);
if (newBlocks == null || newBlocks.locatedBlockCount() == 0) {
throw new EOFException("Could not find target position " + offset);
}
locatedBlocks.insertRange(targetBlockIdx, newBlocks.getLocatedBlocks());
}
return locatedBlocks.get(targetBlockIdx);
}
}
我这里是第一次读取,所有调试结果显示是从10个block中根据offset选出第一个block
选出的block信息:
LocatedBlock{BP-1490162904-10.180.249.219-1595812228535:blk_1073742959_2136; getBlockSize()=134217728; corrupt=false; offset=0; locs=[DatanodeInfoWithStorage[10.180.249.215:9866,DS-71345a2c-d308-478f-82f1-b8d276262f7d,DISK], DatanodeInfoWithStorage[10.180.249.226:9866,DS-d822e8bc-c946-4b93-8208-597b08c06b94,DISK], DatanodeInfoWithStorage[10.180.249.225:9866,DS-3d27fa2a-8c66-4774-9092-ab75f267355f,DISK]]}
选择datanode
DFSInputStream.chooseDataNode方法:
private DNAddrPair chooseDataNode(LocatedBlock block,
Collection<DatanodeInfo> ignoredNodes, boolean refetchIfRequired)
throws IOException {
while (true) {
DNAddrPair result = getBestNodeDNAddrPair(block, ignoredNodes);
if (result != null) {
return result;
} else if (refetchIfRequired) {
block = refetchLocations(block, ignoredNodes);
} else {
return null;
}
}
}
DFSInputStream.getBestNodeDNAddrPair方法:
protected DNAddrPair getBestNodeDNAddrPair(LocatedBlock block,
Collection<DatanodeInfo> ignoredNodes) {
// 获取block的位置信息,以8.1G的centos镜像为例,三副本存储,获取到第一个block的nodes信息为:
// DatanodeInfoWithStorage[10.180.249.215:9866,DS-71345a2c-d308-478f-82f1-b8d276262f7d,DISK]
// DatanodeInfoWithStorage[10.180.249.226:9866,DS-d822e8bc-c946-4b93-8208-597b08c06b94,DISK]
// DatanodeInfoWithStorage[10.180.249.225:9866,DS-3d27fa2a-8c66-4774-9092-ab75f267355f,DISK]
// block LocatedBlock, containing nodes in priority order 意思就是block中node的顺序已经是最有排序了
DatanodeInfo[] nodes = block.getLocations();
// 获取存储类型:DISK
StorageType[] storageTypes = block.getStorageTypes();
DatanodeInfo chosenNode = null;
StorageType storageType = null;
if (nodes != null) {
for (int i = 0; i < nodes.length; i++) {
// 选取datanode
/**
* deadNodes:ConcurrentHashmap
* Put chosen node into dead list, continue
* addToDeadNodes(chosenNode);
*/
if (!deadNodes.containsKey(nodes[i])
&& (ignoredNodes == null || !ignoredNodes.contains(nodes[i]))) {
chosenNode = nodes[i];
...
}
...
// socket传输
final String dnAddr =
chosenNode.getXferAddr(dfsClient.getConf().isConnectToDnViaHostname());
DFSClient.LOG.debug("Connecting to datanode {}", dnAddr);
InetSocketAddress targetAddr = NetUtils.createSocketAddr(dnAddr);
return new DNAddrPair(chosenNode, targetAddr, storageType, block);
}
选择reader
DFSInputStream.getBlockReader方法:
protected BlockReader getBlockReader(LocatedBlock targetBlock,
long offsetInBlock, long length, InetSocketAddress targetAddr,
StorageType storageType, DatanodeInfo datanode) throws IOException {
//获取blk信息
ExtendedBlock blk = targetBlock.getBlock();
Token<BlockTokenIdentifier> accessToken = targetBlock.getBlockToken();
CachingStrategy curCachingStrategy;
boolean shortCircuitForbidden;
synchronized (infoLock) {
// todo
curCachingStrategy = cachingStrategy;
shortCircuitForbidden = shortCircuitForbidden();
}
return new BlockReaderFactory(dfsClient.getConf()).
setInetSocketAddress(targetAddr).
setRemotePeerFactory(dfsClient).
setDatanodeInfo(datanode).
setStorageType(storageType).
setFileName(src).
setBlock(blk).
setBlockToken(accessToken).
setStartOffset(offsetInBlock).
setVerifyChecksum(verifyChecksum).
setClientName(dfsClient.clientName).
setLength(length).
setCachingStrategy(curCachingStrategy).
setAllowShortCircuitLocalReads(!shortCircuitForbidden).
setClientCacheContext(dfsClient.getClientContext()).
setUserGroupInformation(dfsClient.ugi).
setConfiguration(dfsClient.getConfiguration()).
build();
}
build方法:
// 根据具体情况构造合适的reader
// short-circuit block readers
// UNIX domain sockets
// ShortCircuitCache:file descriptor
// DomainSocketFactory:UNIX domain socket paths
// PeerCache:have used in the past
public BlockReader build() throws IOException {
...
// 从DfsClientConf配置中利用反射获取ExternalBlockReader对象,此对象使用可插入的ReplicaAccessor对象从副本中读取数据
BlockReader reader = tryToCreateExternalBlockReader();
if (reader != null) {
return reader;
}
final ShortCircuitConf scConf = conf.getShortCircuitConf();
// 首先尝试:short-circuit block readers
try {
if (scConf.isShortCircuitLocalReads() && allowShortCircuitLocalReads) {
if (clientContext.getUseLegacyBlockReaderLocal()) {
reader = getLegacyBlockReaderLocal();
if (reader != null) {
LOG.trace("{}: returning new legacy block reader local.", this);
return reader;
}
} else {
reader = getBlockReaderLocal();
if (reader != null) {
LOG.trace("{}: returning new block reader local.", this);
return reader;
}
}
}
// 然后尝试:UNIX domain sockets
if (scConf.isDomainSocketDataTraffic()) {
reader = getRemoteBlockReaderFromDomain();
if (reader != null) {
LOG.trace("{}: returning new remote block reader using UNIX domain "
+ "socket on {}", this, pathInfo.getPath());
return reader;
}
}
} catch (IOException e) {
LOG.debug("Block read failed. Getting remote block reader using TCP", e);
}
Preconditions.checkState(!DFSInputStream.tcpReadsDisabledForTesting,
"TCP reads were disabled for testing, but we failed to " +
"do a non-TCP read.");
// 最后是TCP sockets
return getRemoteBlockReaderFromTcp();
}
TCP sockets的reader类:BlockReaderRemote类
public static BlockReader newBlockReader(String file,
ExtendedBlock block,
Token<BlockTokenIdentifier> blockToken,
long startOffset, long len,
boolean verifyChecksum,
String clientName,
Peer peer, DatanodeID datanodeID,
PeerCache peerCache,
CachingStrategy cachingStrategy,
int networkDistance) throws IOException {
// in and out will be closed when sock is closed (by the caller)
// 从peer中获取一个out流,发送header
final DataOutputStream out = new DataOutputStream(new BufferedOutputStream(
peer.getOutputStream()));
// 使用Sender对象读取数据
new Sender(out).readBlock(block, blockToken, clientName, startOffset, len,
verifyChecksum, cachingStrategy);
//
// Get bytes in block
// // 从peer中获取一个in流,接收应答校验
DataInputStream in = new DataInputStream(peer.getInputStream());
BlockOpResponseProto status = BlockOpResponseProto.parseFrom(
PBHelperClient.vintPrefixed(in));
checkSuccess(status, peer, block, file);
ReadOpChecksumInfoProto checksumInfo =
status.getReadOpChecksumInfo();
DataChecksum checksum = DataTransferProtoUtil.fromProto(
checksumInfo.getChecksum());
//Warning when we get CHECKSUM_NULL?
// Read the first chunk offset.
long firstChunkOffset = checksumInfo.getChunkOffset();
if ( firstChunkOffset < 0 || firstChunkOffset > startOffset ||
firstChunkOffset <= (startOffset - checksum.getBytesPerChecksum())) {
throw new IOException("BlockReader: error in first chunk offset (" +
firstChunkOffset + ") startOffset is " +
startOffset + " for file " + file);
}
return new BlockReaderRemote(file, block.getBlockId(), checksum,
verifyChecksum, startOffset, firstChunkOffset, len, peer, datanodeID,
peerCache, networkDistance);
}
Sender.readBlock方法:
public void readBlock(final ExtendedBlock blk,
final Token<BlockTokenIdentifier> blockToken,
final String clientName,
final long blockOffset,
final long length,
final boolean sendChecksum,
final CachingStrategy cachingStrategy) throws IOException {
OpReadBlockProto proto = OpReadBlockProto.newBuilder()
.setHeader(DataTransferProtoUtil.buildClientHeader(blk, clientName,
blockToken))
.setOffset(blockOffset)
.setLen(length)
.setSendChecksums(sendChecksum)
.setCachingStrategy(getCachingStrategy(cachingStrategy))
.build();
send(out, Op.READ_BLOCK, proto);
}
OpReadBlockProto :
message OpReadBlockProto {
required ClientOperationHeaderProto header = 1;
required uint64 offset = 2;
equired uint64 len = 3;
optional bool sendChecksums = 4 [default = true];
optional CachingStrategyProto cachingStrategy = 5;
}
BlockReaderRemote.readNextPacket方法:
//读取数据
private void readNextPacket() throws IOException {
//Read packet headers.
packetReceiver.receiveNextPacket(in);
PacketHeader curHeader = packetReceiver.getHeader();
curDataSlice = packetReceiver.getDataSlice();
assert curDataSlice.capacity() == curHeader.getDataLen();
LOG.trace("DFSClient readNextPacket got header {}", curHeader);
// Sanity check the lengths
if (!curHeader.sanityCheck(lastSeqNo)) {
throw new IOException("BlockReader: error in packet header " +
curHeader);
}
if (curHeader.getDataLen() > 0) {
int chunks = 1 + (curHeader.getDataLen() - 1) / bytesPerChecksum;
int checksumsLen = chunks * checksumSize;
assert packetReceiver.getChecksumSlice().capacity() == checksumsLen :
"checksum slice capacity=" +
packetReceiver.getChecksumSlice().capacity() +
" checksumsLen=" + checksumsLen;
lastSeqNo = curHeader.getSeqno();
if (verifyChecksum && curDataSlice.remaining() > 0) {
// N.B.: the checksum error offset reported here is actually
// relative to the start of the block, not the start of the file.
// This is slightly misleading, but preserves the behavior from
// the older BlockReader.
checksum.verifyChunkedSums(curDataSlice,
packetReceiver.getChecksumSlice(),
filename, curHeader.getOffsetInBlock());
}
bytesNeededToFinish -= curHeader.getDataLen();
}
// First packet will include some data prior to the first byte
// the user requested. Skip it.
if (curHeader.getOffsetInBlock() < startOffset) {
int newPos = (int) (startOffset - curHeader.getOffsetInBlock());
curDataSlice.position(newPos);
}
// If we've now satisfied the whole client read, read one last packet
// header, which should be empty
if (bytesNeededToFinish <= 0) {
readTrailingEmptyPacket();
if (verifyChecksum) {
sendReadResult(Status.CHECKSUM_OK);
} else {
sendReadResult(Status.SUCCESS);
}
}
}