【Hadoop篇】hdfs文件上传源码分析

0、上传文件代码

FileSystem fs = FileSystem.get(new URI("hdfs://hadoopmaster:9000"), new Configuration(), "root");
OutputStream os = fs.create(new Path("/test.log"));
FileInputStream fis = new FileInputStream("I://test.log");
IOUtils.copyBytes(fis, os, 2048,true);
fs.copyFromLocalFile(new Path("I://test.log"), new Path("/test.log"));

1、创建文件

1.根据协议类型创建具体的FileSystem,hdfs是DistributedFileSystem

FileSystem fs = FileSystem.get(new URI("hdfs://hadoopmaster:9000"), new Configuration(), "root");
->return get(uri, conf);
  ->return createFileSystem(uri, conf);
    ->Class<? extends FileSystem> clazz = getFileSystemClass(uri.getScheme(), conf);
      ->String property = "fs." + scheme + ".impl";
      ->clazz = (Class<? extends FileSystem>) conf.getClass(property, null);  --假设是DistributedFileSystem
    ->FileSystem fs = ReflectionUtils.newInstance(clazz, conf);
    ->fs.initialize(uri, conf);

2、文件输出流的创建

1.调用dfsClient.namenode.create(…)访问NameNode创建文件
2.创建DFSOutputStream管理流写入
3.创建DFSClient管理整个网络请求
4.创建DataStreamer负责文件流上传

OutputStream os = fs.create(new Path("/test.log"));
->FSDataOutputStream out = fs.create(file);  --假设fs是DistributedFileSystem【FileSystem】
  ->return new FileSystemLinkResolver<HdfsDataOutputStream>() {}.resolve(this, absF);
    ->for (boolean isLink = true; isLink;)
      ->in = doCall(p);
        ->final DFSOutputStream out = dfs.create(getPathName(f),...);
          ->final DFSOutputStream result = DFSOutputStream.newStreamForCreate(this,src, masked, flag, createParent, replication, blockSize, progress,dfsClientConf.createChecksum(checksumOpt),getFavoredNodesStr(favoredNodes), ecPolicyName, storagePolicy);  --【DFSOutputStream】
            ->stat = dfsClient.namenode.create(src, masked, dfsClient.clientName,new EnumSetWritable<>(flag), createParent, replication,blockSize, SUPPORTED_CRYPTO_VERSIONS, ecPolicyName,storagePolicy);
            ->out = new DFSOutputStream(dfsClient, src, stat, flag, progress, checksum, favoredNodes, true);  --【DFSOutputStream】
             ->streamer = new DataStreamer(stat, null, dfsClient, src, progress,checksum, cachingStrategy, byteArrayManager, favoredNodes,addBlockFlags);
        ->return safelyCreateWrappedOutputStream(out);
          ->return dfs.createWrappedOutputStream(dfsos, statistics);
            ->return createWrappedOutputStream(dfsos, statistics, 0);
             ->final FileEncryptionInfo feInfo = dfsos.getFileEncryptionInfo();
             ->return new HdfsDataOutputStream(dfsos, statistics, startPos);
               
    ->return in;  

3、文件的写入

1.DFSOutputStream将文件流写入DFSPacket

FileInputStream fis = new FileInputStream("I://test.log");
IOUtils.copyBytes(fis, os, 2048,true);
->copyBytes(in, out, buffSize);
  ->while (bytesRead >= 0)
    ->out.write(buf, 0, bytesRead);
      ->out.write(b);   --out假设是DFSOutputStream【FSDataOutputStream】
        ->for (int n=0;n<len;n+=write1(b, off+n, len-n)) {}
          ->System.arraycopy(b, off, buf, count, bytesToCopy);
          ->flushBuffer();
           ->writeChecksumChunks(buf, 0, lenToFlush);
             ->for (int i = 0; i < len; i += sum.getBytesPerChecksum())
               ->writeChunk(b, off + i, chunkLen, checksum, ckOffset, getChecksumSize());
                 ->currentPacket.writeData(b, offset, len);   --【DFSOutputStream】
                 //如果block字节写满,则放入提交队列
                 ->enqueueCurrentPacketFull();   --【DFSOutputStream】
                   ->enqueueCurrentPacket();
                     //如果提交队列已满,会线程等待
                     ->getStreamer().waitAndQueuePacket(currentPacket);
                       ->queuePacket(packet);
                         ->dataQueue.addLast(packet);

2.DataStreamer调用dfsClient.namenode.addBlock(…)向NameNode申请节点
3.DataStreamer异步将文件流提交给DataNode

class DataStreamer extends Daemon {
   protected final BlockToWrite block; 
   private DataOutputStream blockStream;
   private DataInputStream blockReplyStream;
   private volatile DatanodeInfo[] nodes = null;
    
   protected final DFSClient dfsClient;
   protected final HdfsFileStatus stat;
   protected final LinkedList<DFSPacket> dataQueue = new LinkedList<>();
   private final LinkedList<DFSPacket> ackQueue = new LinkedList<>();
   private final List<DatanodeInfo> congestedNodes = new ArrayList<>();
   
   private DataStreamer(HdfsFileStatus stat, ExtendedBlock block,DFSClient dfsClient, String src,Progressable progress, DataChecksum checksum,AtomicReference<CachingStrategy> cachingStrategy,ByteArrayManager byteArrayManage,boolean isAppend, String[] favoredNodes,EnumSet<AddBlockFlag> flags) {
       this.block = new BlockToWrite(block);
       this.stat = stat;
   }
   
   @Override
   public void run() {
       while (!streamerClosed && dfsClient.clientRunning) {
           DFSPacket one;
           synchronized (dataQueue) {
               dataQueue.wait(timeout);
               if (dataQueue.isEmpty()) {
                   one = createHeartbeatPacket();
                   ->final byte[] buf = new byte[PacketHeader.PKT_MAX_HEADER_LEN];
                   ->return new DFSPacket(buf, 0, 0, DFSPacket.HEART_BEAT_SEQNO, 0, false);
               }else{
                   one = dataQueue.getFirst(); // regular data packet
               }
           }
           
           if (stage == BlockConstructionStage.PIPELINE_SETUP_CREATE) {
               setPipeline(nextBlockOutputStream());
               ->protected LocatedBlock nextBlockOutputStream() throws IOException
                 ->lb = locateFollowingBlock(excluded.length > 0 ? excluded : null, oldBlock);
                   ->return DFSOutputStream.addBlock(excluded, dfsClient, src, oldBlock,stat.getFileId(), favoredNodes, addBlockFlags);
                     ->return dfsClient.namenode.addBlock(src, dfsClient.clientName, prevBlock,excludedNodes, fileId, favoredNodes, allocFlags);
               ->private void setPipeline(LocatedBlock lb)
                 ->setPipeline(lb.getLocations(), lb.getStorageTypes(), lb.getStorageIDs());
                   ->success = createBlockOutputStream(nodes, nextStorageTypes, nextStorageIDs, 0L, false);
                     ->s = createSocketForPipeline(nodes[0], nodes.length, dfsClient);
                       ->final String dnAddr = first.getXferAddr(conf.isConnectToDnViaHostname());
                       ->final InetSocketAddress isa = NetUtils.createSocketAddr(dnAddr);
                       ->final Socket sock = client.socketFactory.createSocket();
                       ->NetUtils.connect(sock, isa, client.getRandomLocalInterfaceAddr(), conf.getSocketTimeout());
                     ->OutputStream unbufOut = NetUtils.getOutputStream(s, writeTimeout);
                     ->InputStream unbufIn = NetUtils.getInputStream(s, readTimeout);
                     ->out = new DataOutputStream(new BufferedOutputStream(unbufOut,DFSUtilClient.getSmallBufferSize(dfsClient.getConfiguration())));
                     ->new Sender(out).writeBlock(blockCopy, nodeStorageTypes[0], accessToken,dfsClient.clientName, nodes, nodeStorageTypes, null, bcs,nodes.length, block.getNumBytes(), bytesSent, newGS,checksum4WriteBlock, cachingStrategy.get(), isLazyPersistFile,(targetPinnings != null && targetPinnings[0]), targetPinnings,nodeStorageIDs[0], nodeStorageIDs);
                     ->blockStream = out;
               initDataStreaming();
               ->response = new ResponseProcessor(nodes);
               ->response.start();
           } else if (stage == BlockConstructionStage.PIPELINE_SETUP_APPEND) {
               setupPipelineForAppendOrRecovery();
               initDataStreaming();
           }
           
           one.writeTo(blockStream);
           blockStream.flush();
       }
   }
}
  • 1
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值