客户端获取文件的checksum
整体流程如下:
1、获取文件的所有block
2、遍历每个block
3、获取block的所有副本以及所在的datanode,去datanode上获取这个block的checksum
4、获取每个block的checksum的byte-per-checksum,crc-per-block(block的crc)以及MD5
5、根据所有block的MD5计算文件的MD5
6、文件的MD5 + bytesPerCRC + crcPerBlock 计算MD5FileChecksum
public MD5MD5CRC32FileChecksum getFileChecksum(String src, long length)
throws IOException {
checkOpen();
Preconditions.checkArgument(length >= 0);
//get block locations for the file range
LocatedBlocks blockLocations = callGetBlockLocations(namenode, src, 0,
length, dfsClientConf.preferedPool);
if (null == blockLocations) {
throw new FileNotFoundException("File does not exist: " + src);
}
List<LocatedBlock> locatedblocks = blockLocations.getLocatedBlocks();
final DataOutputBuffer md5out = new DataOutputBuffer();
int bytesPerCRC = -1;
DataChecksum.Type crcType = DataChecksum.Type.DEFAULT;
long crcPerBlock = 0;
boolean refetchBlocks = false;
int lastRetriedIndex = -1;
// get block checksum for each block
long remaining = length;
if (src.contains(HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR_SEPARATOR)) {
remaining = Math.min(length, blockLocations.getFileLength());
}
for(int i = 0; i < locatedblocks.size() && remaining > 0; i++) {
if (refetchBlocks) { // refetch to get fresh tokens
blockLocations = callGetBlockLocations(namenode, src, 0, length, dfsClientConf.preferedPool);
if (null == blockLocations) {
throw new FileNotFoundException("File does not exist: " + src);
}
locatedblocks = blockLocations.getLocatedBlocks();
refetchBlocks = false;
}
LocatedBlock lb = locatedblocks.get(i);
final ExtendedBlock block = lb.getBlock();
if (remaining < block.getNumBytes()) {
block.setNumBytes(remaining);
}
remaining -= block.getNumBytes();
final DatanodeInfo[] datanodes = lb.getLocations();
//try each datanode location of the block
final int timeout = 3000 * datanodes.length + dfsClientConf.socketTimeout;
boolean done = false;
for(int j = 0; !done && j < datanodes.length; j++) {
DataOutputStream out = null;
DataInputStream in = null;
try {
//connect to a datanode
IOStreamPair pair = connectToDN(datanodes[j], timeout, lb);
out = new DataOutputStream(new BufferedOutputStream(pair.out,
HdfsConstants.SMALL_BUFFER_SIZE));
in = new DataInputStream(pair.in);
if (LOG.isDebugEnabled()) {
LOG.debug("write to " + datanodes[j] + ": "
+ Op.BLOCK_CHECKSUM + ", block=" + block);
}
// get block MD5
new Sender(out).blockChecksum(block, lb.getBlockToken());
final BlockOpResponseProto reply =
BlockOpResponseProto.parseFrom(PBHelper.vintPrefixed(in));
if (reply.getStatus() != Status.SUCCESS) {
if (reply.getStatus() == Status.ERROR_ACCESS_TOKEN) {
throw new InvalidBlockTokenException();
} else {
throw new IOException("Bad response " + reply + " for block "
+ block + " from datanode " + datanodes[j]);
}
}
OpBlockChecksumResponseProto checksumData =
reply.getChecksumResponse();
//read byte-per-checksum
final int bpc = checksumData.getBytesPerCrc();
if (i == 0) { //first block