0、上传文件代码
FileSystem fs = FileSystem.get(new URI("hdfs://hadoopmaster:9000"), new Configuration(), "root");
OutputStream os = fs.create(new Path("/test.log"));
FileInputStream fis = new FileInputStream("I://test.log");
IOUtils.copyBytes(fis, os, 2048,true);
fs.copyFromLocalFile(new Path("I://test.log"), new Path("/test.log"));
1、创建文件
1.根据协议类型创建具体的FileSystem,hdfs是DistributedFileSystem
FileSystem fs = FileSystem.get(new URI("hdfs://hadoopmaster:9000"), new Configuration(), "root");
->return get(uri, conf);
->return createFileSystem(uri, conf);
->Class<? extends FileSystem> clazz = getFileSystemClass(uri.getScheme(), conf);
->String property = "fs." + scheme + ".impl";
->clazz = (Class<? extends FileSystem>) conf.getClass(property, null); --假设是DistributedFileSystem
->FileSystem fs = ReflectionUtils.newInstance(clazz, conf);
->fs.initialize(uri, conf);
2、文件输出流的创建
1.调用dfsClient.namenode.create(…)访问NameNode创建文件
2.创建DFSOutputStream管理流写入
3.创建DFSClient管理整个网络请求
4.创建DataStreamer负责文件流上传
OutputStream os = fs.create(new Path("/test.log"));
->FSDataOutputStream out = fs.create(file); --假设fs是DistributedFileSystem【FileSystem】
->return new FileSystemLinkResolver<HdfsDataOutputStream>() {}.resolve(this, absF);
->for (boolean isLink = true; isLink;)
->in = doCall(p);
->final DFSOutputStream out = dfs.create(getPathName(f),...);
->final DFSOutputStream result = DFSOutputStream.newStreamForCreate(this,src, masked, flag, createParent, replication, blockSize, progress,dfsClientConf.createChecksum(checksumOpt),getFavoredNodesStr(favoredNodes), ecPolicyName, storagePolicy); --【DFSOutputStream】
->stat = dfsClient.namenode.create(src, masked, dfsClient.clientName,new EnumSetWritable<>(flag), createParent, replication,blockSize, SUPPORTED_CRYPTO_VERSIONS, ecPolicyName,storagePolicy);
->out = new DFSOutputStream(dfsClient, src, stat, flag, progress, checksum, favoredNodes, true); --【DFSOutputStream】
->streamer = new DataStreamer(stat, null, dfsClient, src, progress,checksum, cachingStrategy, byteArrayManager, favoredNodes,addBlockFlags);
->return safelyCreateWrappedOutputStream(out);
->return dfs.createWrappedOutputStream(dfsos, statistics);
->return createWrappedOutputStream(dfsos, statistics, 0);
->final FileEncryptionInfo feInfo = dfsos.getFileEncryptionInfo();
->return new HdfsDataOutputStream(dfsos, statistics, startPos);
->return in;
3、文件的写入
1.DFSOutputStream将文件流写入DFSPacket
FileInputStream fis = new FileInputStream("I://test.log");
IOUtils.copyBytes(fis, os, 2048,true);
->copyBytes(in, out, buffSize);
->while (bytesRead >= 0)
->out.write(buf, 0, bytesRead);
->out.write(b); --out假设是DFSOutputStream【FSDataOutputStream】
->for (int n=0;n<len;n+=write1(b, off+n, len-n)) {}
->System.arraycopy(b, off, buf, count, bytesToCopy);
->flushBuffer();
->writeChecksumChunks(buf, 0, lenToFlush);
->for (int i = 0; i < len; i += sum.getBytesPerChecksum())
->writeChunk(b, off + i, chunkLen, checksum, ckOffset, getChecksumSize());
->currentPacket.writeData(b, offset, len); --【DFSOutputStream】
//如果block字节写满,则放入提交队列
->enqueueCurrentPacketFull(); --【DFSOutputStream】
->enqueueCurrentPacket();
//如果提交队列已满,会线程等待
->getStreamer().waitAndQueuePacket(currentPacket);
->queuePacket(packet);
->dataQueue.addLast(packet);
2.DataStreamer调用dfsClient.namenode.addBlock(…)向NameNode申请节点
3.DataStreamer异步将文件流提交给DataNode
class DataStreamer extends Daemon {
protected final BlockToWrite block;
private DataOutputStream blockStream;
private DataInputStream blockReplyStream;
private volatile DatanodeInfo[] nodes = null;
protected final DFSClient dfsClient;
protected final HdfsFileStatus stat;
protected final LinkedList<DFSPacket> dataQueue = new LinkedList<>();
private final LinkedList<DFSPacket> ackQueue = new LinkedList<>();
private final List<DatanodeInfo> congestedNodes = new ArrayList<>();
private DataStreamer(HdfsFileStatus stat, ExtendedBlock block,DFSClient dfsClient, String src,Progressable progress, DataChecksum checksum,AtomicReference<CachingStrategy> cachingStrategy,ByteArrayManager byteArrayManage,boolean isAppend, String[] favoredNodes,EnumSet<AddBlockFlag> flags) {
this.block = new BlockToWrite(block);
this.stat = stat;
}
@Override
public void run() {
while (!streamerClosed && dfsClient.clientRunning) {
DFSPacket one;
synchronized (dataQueue) {
dataQueue.wait(timeout);
if (dataQueue.isEmpty()) {
one = createHeartbeatPacket();
->final byte[] buf = new byte[PacketHeader.PKT_MAX_HEADER_LEN];
->return new DFSPacket(buf, 0, 0, DFSPacket.HEART_BEAT_SEQNO, 0, false);
}else{
one = dataQueue.getFirst(); // regular data packet
}
}
if (stage == BlockConstructionStage.PIPELINE_SETUP_CREATE) {
setPipeline(nextBlockOutputStream());
->protected LocatedBlock nextBlockOutputStream() throws IOException
->lb = locateFollowingBlock(excluded.length > 0 ? excluded : null, oldBlock);
->return DFSOutputStream.addBlock(excluded, dfsClient, src, oldBlock,stat.getFileId(), favoredNodes, addBlockFlags);
->return dfsClient.namenode.addBlock(src, dfsClient.clientName, prevBlock,excludedNodes, fileId, favoredNodes, allocFlags);
->private void setPipeline(LocatedBlock lb)
->setPipeline(lb.getLocations(), lb.getStorageTypes(), lb.getStorageIDs());
->success = createBlockOutputStream(nodes, nextStorageTypes, nextStorageIDs, 0L, false);
->s = createSocketForPipeline(nodes[0], nodes.length, dfsClient);
->final String dnAddr = first.getXferAddr(conf.isConnectToDnViaHostname());
->final InetSocketAddress isa = NetUtils.createSocketAddr(dnAddr);
->final Socket sock = client.socketFactory.createSocket();
->NetUtils.connect(sock, isa, client.getRandomLocalInterfaceAddr(), conf.getSocketTimeout());
->OutputStream unbufOut = NetUtils.getOutputStream(s, writeTimeout);
->InputStream unbufIn = NetUtils.getInputStream(s, readTimeout);
->out = new DataOutputStream(new BufferedOutputStream(unbufOut,DFSUtilClient.getSmallBufferSize(dfsClient.getConfiguration())));
->new Sender(out).writeBlock(blockCopy, nodeStorageTypes[0], accessToken,dfsClient.clientName, nodes, nodeStorageTypes, null, bcs,nodes.length, block.getNumBytes(), bytesSent, newGS,checksum4WriteBlock, cachingStrategy.get(), isLazyPersistFile,(targetPinnings != null && targetPinnings[0]), targetPinnings,nodeStorageIDs[0], nodeStorageIDs);
->blockStream = out;
initDataStreaming();
->response = new ResponseProcessor(nodes);
->response.start();
} else if (stage == BlockConstructionStage.PIPELINE_SETUP_APPEND) {
setupPipelineForAppendOrRecovery();
initDataStreaming();
}
one.writeTo(blockStream);
blockStream.flush();
}
}
}