hdfs写文件creat客户端代码追踪

1 创建输出流过程

客户端通过给FileSystem对象的create方法传入Path对象来构建输出流,在FileSystem对象内部调用多个create方法来传入相应的配置。

  /**
   * Create an FSDataOutputStream at the indicated Path with write-progress
   * reporting.
   * @param f the file name to open
   * @param overwrite if a file with this name already exists, then if true,
   *   the file will be overwritten, and if false an error will be thrown.
   * @param bufferSize the size of the buffer to be used.
   * @param replication required block replication for the file.
   * @throws IOException IO failure
   */
  public FSDataOutputStream create(Path f,
                                            boolean overwrite,
                                            int bufferSize,
                                            short replication,
                                            long blockSize,
                                            Progressable progress
                                            ) throws IOException {
	//配置包含io.file.buffer.size,默认权限,副本数,块大小
    return this.create(f, FsCreateModes.applyUMask(
        FsPermission.getFileDefault(), FsPermission.getUMask(getConf())),
        overwrite, bufferSize, replication, blockSize, progress);
  }

然后,调用至DistributedFileSystem的create方法中,该方法使用FileSystemLinkResolver帮助构建流建立的重试机制与读的open方法相似。

  public FSDataOutputStream create(final Path f, final FsPermission permission,
      final EnumSet<CreateFlag> cflags, final int bufferSize,
      final short replication, final long blockSize,
      final Progressable progress, final ChecksumOpt checksumOpt)
      throws IOException {
    statistics.incrementWriteOps(1);
    storageStatistics.incrementOpCounter(OpType.CREATE);
    Path absF = fixRelativePart(f);
    return new FileSystemLinkResolver<FSDataOutputStream>() {
      @Override
      public FSDataOutputStream doCall(final Path p) throws IOException {
		//检查path规范,调用DFSClient
        final DFSOutputStream dfsos = dfs.create(getPathName(p), permission,
            cflags, replication, blockSize, progress, bufferSize,
            checksumOpt);
        return dfs.createWrappedOutputStream(dfsos, statistics);
      }
      @Override
      public FSDataOutputStream next(final FileSystem fs, final Path p)
          throws IOException {
        return fs.create(p, permission, cflags, bufferSize,
            replication, blockSize, progress, checksumOpt);
      }
    }.resolve(this, absF);
  }

通过DFSClient中的create方法创建输出流

  public DFSOutputStream create(String src, FsPermission permission,
      EnumSet<CreateFlag> flag, boolean createParent, short replication,
      long blockSize, Progressable progress, int buffersize,
      ChecksumOpt checksumOpt, InetSocketAddress[] favoredNodes,
      String ecPolicyName) throws IOException {
    checkOpen();
	//决定新写入文件的权限
    final FsPermission masked = applyUMask(permission);
    LOG.debug("{}: masked={}", src, masked);
	//创建流
    final DFSOutputStream result = DFSOutputStream.newStreamForCreate(this,
        src, masked, flag, createParent, replication, blockSize, progress,
        dfsClientConf.createChecksum(checksumOpt),//获取校验和方式
        getFavoredNodesStr(favoredNodes), ecPolicyName);
	//获取输出流之后,开始对这个文件进行租约lease,过期的租约会导致租约持有者不能写数据,也客户防止多个客户端写一个文件
    beginFileLease(result.getFileId(), result);
    return result;
  }

2 向NameNode发起请求

DFSOutputStream中与NameNode沟通创建元信息

  static DFSOutputStream newStreamForCreate(DFSClient dfsClient, String src,
      FsPermission masked, EnumSet<CreateFlag> flag, boolean createParent,
      short replication, long blockSize, Progressable progress,
      DataChecksum checksum, String[] favoredNodes, String ecPolicyName)
      throws IOException {
	  ...
      HdfsFileStatus stat = null;
	  ...
	  //可重试10次
      while (shouldRetry) {
        shouldRetry = false;
        try {
		  //创建HdfsFileStatus,利用RPC调用向NameNode请求
          stat = dfsClient.namenode.create(src, masked, dfsClient.clientName,
              new EnumSetWritable<>(flag), createParent, replication,
              blockSize, SUPPORTED_CRYPTO_VERSIONS, ecPolicyName);
          break;
        } ...
      Preconditions.checkNotNull(stat, "HdfsFileStatus should not be null!");
      final DFSOutputStream out;
	  //纠删码与副本写入分流
      if(stat.getErasureCodingPolicy() != null) {
        out = new DFSStripedOutputStream(dfsClient, src, stat,
            flag, progress, checksum, favoredNodes);
      } else {
	  
        out = new DFSOutputStream(dfsClient, src, stat,
            flag, progress, checksum, favoredNodes, true);
      }
      out.start();
      return out;
    }
  }

通过ClientProtocol协议向namenode server发起请求

  public HdfsFileStatus create(String src, FsPermission masked,
      String clientName, EnumSetWritable<CreateFlag> flag,
      boolean createParent, short replication, long blockSize,
      CryptoProtocolVersion[] supportedVersions, String ecPolicyName)
      throws IOException {
    CreateRequestProto.Builder builder = CreateRequestProto.newBuilder()
        .setSrc(src)
        .setMasked(PBHelperClient.convert(masked))
        .setClientName(clientName)
        .setCreateFlag(PBHelperClient.convertCreateFlag(flag))
        .setCreateParent(createParent)
        .setReplication(replication)
        .setBlockSize(blockSize);
    if (ecPolicyName != null) {
      builder.setEcPolicyName(ecPolicyName);
    }
    FsPermission unmasked = masked.getUnmasked();
    if (unmasked != null) {
      builder.setUnmasked(PBHelperClient.convert(unmasked));
    }
    builder.addAllCryptoProtocolVersion(
        PBHelperClient.convert(supportedVersions));
    CreateRequestProto req = builder.build();
    try {
      CreateResponseProto res = rpcProxy.create(null, req);
      return res.hasFs() ? PBHelperClient.convert(res.getFs()) : null;
    } catch (ServiceException e) {
      throw ProtobufHelper.getRemoteException(e);
    }

  }

3 创建DataStreamer

获取stat后,根据应答构建DataStreamer

  protected DFSOutputStream(DFSClient dfsClient, String src,
      HdfsFileStatus stat, EnumSet<CreateFlag> flag, Progressable progress,
      DataChecksum checksum, String[] favoredNodes, boolean createStreamer) {
	//3.1 利用构造函数初始化一些成员变量
    this(dfsClient, src, flag, progress, stat, checksum);
	//flag只有CREATE与OVERWRITE
    this.shouldSyncBlock = flag.contains(CreateFlag.SYNC_BLOCK);
	//计算数据包能包含的chunk数及包的长度,默认包大小为64KB,dfs.client-write-packet-size
    computePacketChunkSize(dfsClient.getConf().getWritePacketSize(),
        bytesPerChecksum);
	
	//3.2 初始化DataStreamer对象
    if (createStreamer) {
      streamer = new DataStreamer(stat, null, dfsClient, src, progress,
          checksum, cachingStrategy, byteArrayManager, favoredNodes,
          addBlockFlags);
    }
  }

3.1 构造DFSOutputStream

  private DFSOutputStream(DFSClient dfsClient, String src,
      EnumSet<CreateFlag> flag,
      Progressable progress, HdfsFileStatus stat, DataChecksum checksum) {
    //获取checksum种类与chunk大小
	super(getChecksum4Compute(checksum, stat));
    this.dfsClient = dfsClient;
    this.src = src;
    this.fileId = stat.getFileId();
    this.blockSize = stat.getBlockSize();
    this.blockReplication = stat.getReplication();
    this.fileEncryptionInfo = stat.getFileEncryptionInfo();
    this.cachingStrategy = new AtomicReference<>(
        dfsClient.getDefaultWriteCachingStrategy());
    this.addBlockFlags = EnumSet.noneOf(AddBlockFlag.class);
    if (flag.contains(CreateFlag.NO_LOCAL_WRITE)) {
      this.addBlockFlags.add(AddBlockFlag.NO_LOCAL_WRITE);
    }
    if (progress != null) {
      DFSClient.LOG.debug("Set non-null progress callback on DFSOutputStream "
          +"{}", src);
    }
	//初始化写入数据包大小
    initWritePacketSize();

    this.bytesPerChecksum = checksum.getBytesPerChecksum();
    if (bytesPerChecksum <= 0) {
      throw new HadoopIllegalArgumentException(
          "Invalid value: bytesPerChecksum = " + bytesPerChecksum + " <= 0");
    }
    if (blockSize % bytesPerChecksum != 0) {
      throw new HadoopIllegalArgumentException("Invalid values: "
          + HdfsClientConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY
          + " (=" + bytesPerChecksum + ") must divide block size (=" +
          blockSize + ").");
    }
    this.byteArrayManager = dfsClient.getClientContext().getByteArrayManager();
  }

3.2 初始化DataStreamer对象

  DataStreamer(HdfsFileStatus stat, ExtendedBlock block, DFSClient dfsClient,
               String src, Progressable progress, DataChecksum checksum,
               AtomicReference<CachingStrategy> cachingStrategy,
               ByteArrayManager byteArrayManage, String[] favoredNodes,
               EnumSet<AddBlockFlag> flags) {
    this(stat, block, dfsClient, src, progress, checksum, cachingStrategy,
        byteArrayManage, false, favoredNodes, flags);
	//更新状态
    stage = BlockConstructionStage.PIPELINE_SETUP_CREATE;
  }

构建对象

  private DataStreamer(HdfsFileStatus stat, ExtendedBlock block,
                       DFSClient dfsClient, String src,
                       Progressable progress, DataChecksum checksum,
                       AtomicReference<CachingStrategy> cachingStrategy,
                       ByteArrayManager byteArrayManage,
                       boolean isAppend, String[] favoredNodes,
                       EnumSet<AddBlockFlag> flags) {
    this.block = new BlockToWrite(block);
    this.dfsClient = dfsClient;
    this.src = src;
    this.progress = progress;
    this.stat = stat;
    this.checksum4WriteBlock = checksum;
    this.cachingStrategy = cachingStrategy;
    this.byteArrayManager = byteArrayManage;
    this.isLazyPersistFile = isLazyPersist(stat);
    this.isAppend = isAppend;
    this.favoredNodes = favoredNodes;
    final DfsClientConf conf = dfsClient.getConf();
    this.dfsclientSlowLogThresholdMs = conf.getSlowIoWarningThresholdMs();
    this.excludedNodes = initExcludedNodes(conf.getExcludedNodesCacheExpiry());
    this.errorState = new ErrorState(conf.getDatanodeRestartTimeout());
    this.addBlockFlags = flags;
  }

4 租约Lease

当create方法完成流的建立时,为了保证客户端有权限持续写入会赋予客户端租约。

  private void beginFileLease(final long inodeId, final DFSOutputStream out)
      throws IOException {
    synchronized (filesBeingWritten) {
      putFileBeingWritten(inodeId, out);
      getLeaseRenewer().put(this);
    }
  }

更新租约中正在写入的文件信息与租约时间

  public void putFileBeingWritten(final long inodeId,
      final DFSOutputStream out) {
    synchronized(filesBeingWritten) {
      filesBeingWritten.put(inodeId, out);
      // update the last lease renewal time only when there was no
      // writes. once there is one write stream open, the lease renewer
      // thread keeps it updated well with in anyone's expiration time.
      if (lastLeaseRenewal == 0) {
        updateLastLeaseRenewal();
      }
    }
  }

为该客户端创建一个LeaseRenewer实例,并启动该实例用于客户端的租约更新

  public LeaseRenewer getLeaseRenewer() {
    return LeaseRenewer.getInstance(
        namenodeUri != null ? namenodeUri.getAuthority() : "null", ugi, this);
  }
  
    public synchronized void put(final DFSClient dfsc) {
    if (dfsc.isClientRunning()) {
      if (!isRunning() || isRenewerExpired()) {
        //start a new deamon with a new id.
        final int id = ++currentId;
        daemon = new Daemon(new Runnable() {
          @Override
          public void run() {
            try {
              LeaseRenewer.this.run(id);
            } catch(InterruptedException e) {
              LOG.debug("LeaseRenewer is interrupted.", e);
            } finally {
              synchronized(LeaseRenewer.this) {
                Factory.INSTANCE.remove(LeaseRenewer.this);
              } ...
            }
          }
		  ....
          }
        });
        daemon.start();
      }
      emptyTime = Long.MAX_VALUE;
    }
  }
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值