(六)HDFS写流程之初始化工作

文章目录


总体流程

在这里插入图片描述

Demo

public class FileTest {
	
	public static void main(String[] args) throws IOException {
		//如何创建一个目录
		Configuration configuration=new Configuration();
		//namenode
		FileSystem fileSystem=FileSystem.newInstance(configuration);
		//TODO 创建目录(分析的是元数据的管理流程)
		fileSystem.mkdirs(new Path(""));
		/**
		 * TODO 接下来分析HDFS上传文件的流程
		 * TODO 做一些重要的初始化工作
		 */
		FSDataOutputStream fsous=fileSystem.create(new Path("/user.txt"));		
		//TODO 完成上传文件的流程
		fsous.write("fdsafdsafdsafs".getBytes());		
		//MR
		/**
		 * master:
		 *     启动
		 * worker:
		 *     启动
		 *     
		 *往上面提交任务:
		 *   wordcount
		 */
		//Spark
	}

}
  public FSDataOutputStream create(Path f) throws IOException {
    //TODO 重要
    return create(f, true);
  }

最终调用的是这个方法:

 public FSDataOutputStream create(final Path f, final FsPermission permission,
    final EnumSet<CreateFlag> cflags, final int bufferSize,
    final short replication, final long blockSize, final Progressable progress,
    final ChecksumOpt checksumOpt) throws IOException {
    statistics.incrementWriteOps(1);
    Path absF = fixRelativePart(f);
    return new FileSystemLinkResolver<FSDataOutputStream>() {
      @Override
      public FSDataOutputStream doCall(final Path p)
          throws IOException, UnresolvedLinkException {
    	  //TODO 创建了一个DFSOutputStream,做了很多初始化操作
        /**
         *  * 1) 往文件目录树里面添加了INodeFile
         *  * 2) 添加了契约管理
         *  * 3) 启动了DataStreamer(写数据流程的关键服务)
         */
        final DFSOutputStream dfsos = dfs.create(getPathName(p), permission,
                cflags, replication, blockSize, progress, bufferSize,
                checksumOpt);
        //TODO FSDataOutputStream 是DFSOutputStream 进行了再一次的封装。【装饰模式】
        return dfs.createWrappedOutputStream(dfsos, statistics);
      }
      @Override
      public FSDataOutputStream next(final FileSystem fs, final Path p)
          throws IOException {
        return fs.create(p, permission, cflags, bufferSize,
            replication, blockSize, progress, checksumOpt);
      }
    }.resolve(this, absF);
  }
契约管理:HDFS的文件是write-once-read-many。不支持客户端并行写的操作。
那么就需要一种机制来保证写文件的互斥操作。HDFS提供了租约的机制。
客户端写文件的时候先向 Namenode申请契约,拥有契约的在规定时间拥有文件的写权限。关闭文件时回收契约。
保证同一时间只有一个客户端在写
public DFSOutputStream create(String src, FsPermission permission, EnumSet<CreateFlag> flag, boolean createParent,
			short replication, long blockSize, Progressable progress, int buffersize, ChecksumOpt checksumOpt,
			InetSocketAddress[] favoredNodes) throws IOException {
		checkOpen();
		if (permission == null) {
			permission = FsPermission.getFileDefault();
		}
		FsPermission masked = permission.applyUMask(dfsClientConf.uMask);
		if (LOG.isDebugEnabled()) {
			LOG.debug(src + ": masked=" + masked);
		}
		//重要代码
		/**
		 * 总结:
		 * 1) 往文件目录树里面添加了文件
		 * 2) 添加了契约管理
		 * 3) 启动了DataStreamer
		 */
		final DFSOutputStream result = DFSOutputStream.newStreamForCreate(this, src, masked, flag, createParent,
				replication, blockSize, progress, buffersize, dfsClientConf.createChecksum(checksumOpt),
				getFavoredNodesStr(favoredNodes));
		//TODO 开启契约
		beginFileLease(result.getFileId(), result);
		return result;
	}
static DFSOutputStream newStreamForCreate(DFSClient dfsClient, String src, FsPermission masked,
			EnumSet<CreateFlag> flag, boolean createParent, short replication, long blockSize, Progressable progress,
			int buffersize, DataChecksum checksum, String[] favoredNodes) throws IOException {
		TraceScope scope = dfsClient.getPathTraceScope("newStreamForCreate", src);
		try {
			HdfsFileStatus stat = null;

			// Retry the create if we get a RetryStartFileException up to a maximum
			// number of times
			boolean shouldRetry = true;
			int retryCount = CREATE_RETRY_COUNT;
			
			//TODO 重试的代码结构
			while (shouldRetry) {
				shouldRetry = false;
				try {
					/**
					 * 
					 * HDFS原理总结:
					 * 创建目录:就是在 目录树(元数据)上面添加一个子Node (INodeDirectory)
					 * 上传文件:
					 *     1)在目录树里面添加一个字Node(InodeFile)
					 *     2)再往文件里面写数据
					 *     
						 TODO 往目录树里添加InodeFile,记录元数据日志和添加契约
					     这儿都是需要跟Namenode的服务端进行交互的
					 * 
					 */
					stat = dfsClient.namenode.create(src, masked, dfsClient.clientName,
							new EnumSetWritable<CreateFlag>(flag), createParent, replication, blockSize,
							SUPPORTED_CRYPTO_VERSIONS);
					break;
				} catch (RemoteException re) {
					IOException e = re.unwrapRemoteException(AccessControlException.class,
							DSQuotaExceededException.class, FileAlreadyExistsException.class,
							FileNotFoundException.class, ParentNotDirectoryException.class,
							NSQuotaExceededException.class, RetryStartFileException.class, SafeModeException.class,
							UnresolvedPathException.class, SnapshotAccessControlException.class,
							UnknownCryptoProtocolVersionException.class);
					if (e instanceof RetryStartFileException) {
						//TODO 重试
						if (retryCount > 0) {
							shouldRetry = true;
							retryCount--;
						} else {
							throw new IOException("Too many retries because of encryption" + " zone operations", e);
						}
					} else {
						throw e;
					}
				}
			}
			Preconditions.checkNotNull(stat, "HdfsFileStatus should not be null!");

			//TODO 里面初始化了DataStreamer,DataStreamer是写数据流程里面重要的对象
			final DFSOutputStream out = new DFSOutputStream(dfsClient, src, stat, flag, progress, checksum,
					favoredNodes);
			//TODO 里面启动了DataStreamer
			out.start();
			return out;
		} finally {
			scope.close();
		}
	}

里面namenode是一个rpc的远程调用

public HdfsFileStatus create(String src, FsPermission masked,
      String clientName, EnumSetWritable<CreateFlag> flag,
      boolean createParent, short replication, long blockSize, 
      CryptoProtocolVersion[] supportedVersions)
      throws IOException {
	  //检查namenoe启动状态
    checkNNStartup();
    String clientMachine = getClientMachine();
    if (stateChangeLog.isDebugEnabled()) {
      stateChangeLog.debug("*DIR* NameNode.create: file "
          +src+" for "+clientName+" at "+clientMachine);
    }
    if (!checkPathLength(src)) {
      throw new IOException("create: Pathname too long.  Limit "
          + MAX_PATH_LENGTH + " characters, " + MAX_PATH_DEPTH + " levels.");
    }

    CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache, null);
    if (cacheEntry != null && cacheEntry.isSuccess()) {
      return (HdfsFileStatus) cacheEntry.getPayload();
    }

    HdfsFileStatus status = null;
    try {
      PermissionStatus perm = new PermissionStatus(getRemoteUser()
          .getShortUserName(), null, masked);
      //TODO 创建文件核心代码
      status = namesystem.startFile(src, perm, clientName, clientMachine,
          flag.get(), createParent, replication, blockSize, supportedVersions,
          cacheEntry != null);
    } finally {
      RetryCache.setState(cacheEntry, status != null, status);
    }
    
    metrics.incrFilesCreated();
    metrics.incrCreateFileOps();
    return status;
  }

HdfsFileStatus startFile(String src, PermissionStatus permissions,
      String holder, String clientMachine, EnumSet<CreateFlag> flag,
      boolean createParent, short replication, long blockSize, 
      CryptoProtocolVersion[] supportedVersions, boolean logRetryCache)
      throws AccessControlException, SafeModeException,
      FileAlreadyExistsException, UnresolvedLinkException,
      FileNotFoundException, ParentNotDirectoryException, IOException {

    HdfsFileStatus status = null;
    try {
    	//TODO  重要
      status = startFileInt(src, permissions, holder, clientMachine, flag,
          createParent, replication, blockSize, supportedVersions,
          logRetryCache);
    } catch (AccessControlException e) {
      logAuditEvent(false, "create", src);
      throw e;
    }
    return status;
  }
private HdfsFileStatus startFileInt(final String srcArg,
      PermissionStatus permissions, String holder, String clientMachine,
      EnumSet<CreateFlag> flag, boolean createParent, short replication,
      long blockSize, CryptoProtocolVersion[] supportedVersions,
      boolean logRetryCache)
      throws AccessControlException, SafeModeException,
      FileAlreadyExistsException, UnresolvedLinkException,
      FileNotFoundException, ParentNotDirectoryException, IOException {
    String src = srcArg;
    if (NameNode.stateChangeLog.isDebugEnabled()) {
      StringBuilder builder = new StringBuilder();
      builder.append("DIR* NameSystem.startFile: src=" + src
              + ", holder=" + holder
              + ", clientMachine=" + clientMachine
              + ", createParent=" + createParent
              + ", replication=" + replication
              + ", createFlag=" + flag.toString()
              + ", blockSize=" + blockSize);
      builder.append(", supportedVersions=");
      if (supportedVersions != null) {
        builder.append(Arrays.toString(supportedVersions));
      } else {
        builder.append("null");
      }
      NameNode.stateChangeLog.debug(builder.toString());
    }
    if (!DFSUtil.isValidName(src)) {
      throw new InvalidPathException(src);
    }
    blockManager.verifyReplication(src, replication, clientMachine);

    boolean skipSync = false;
    HdfsFileStatus stat = null;
    FSPermissionChecker pc = getPermissionChecker();
    checkOperation(OperationCategory.WRITE);
    if (blockSize < minBlockSize) {
      throw new IOException("Specified block size is less than configured" +
          " minimum value (" + DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY
          + "): " + blockSize + " < " + minBlockSize);
    }
    byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
    boolean create = flag.contains(CreateFlag.CREATE);
    boolean overwrite = flag.contains(CreateFlag.OVERWRITE);
    boolean isLazyPersist = flag.contains(CreateFlag.LAZY_PERSIST);
    //等待元数据加载
    waitForLoadingFSImage();

    /**
     * If the file is in an encryption zone, we optimistically create an
     * EDEK for the file by calling out to the configured KeyProvider.
     * Since this typically involves doing an RPC, we take the readLock
     * initially, then drop it to do the RPC.
     * 
     * Since the path can flip-flop between being in an encryption zone and not
     * in the meantime, we need to recheck the preconditions when we retake the
     * lock to do the create. If the preconditions are not met, we throw a
     * special RetryStartFileException to ask the DFSClient to try the create
     * again later.
     */
    CryptoProtocolVersion protocolVersion = null;
    CipherSuite suite = null;
    String ezKeyName = null;
    EncryptedKeyVersion edek = null;

    if (provider != null) {
      readLock();
      try {
 
        src = dir.resolvePath(pc, src, pathComponents);
        INodesInPath iip = dir.getINodesInPath4Write(src);
        // Nothing to do if the path is not within an EZ
        final EncryptionZone zone = dir.getEZForPath(iip);
        if (zone != null) {
          protocolVersion = chooseProtocolVersion(zone, supportedVersions);
          suite = zone.getSuite();
          ezKeyName = zone.getKeyName();

          Preconditions.checkNotNull(protocolVersion);
          Preconditions.checkNotNull(suite);
          Preconditions.checkArgument(!suite.equals(CipherSuite.UNKNOWN),
              "Chose an UNKNOWN CipherSuite!");
          Preconditions.checkNotNull(ezKeyName);
        }
      } finally {
        readUnlock();
      }

      Preconditions.checkState(
          (suite == null && ezKeyName == null) ||
              (suite != null && ezKeyName != null),
          "Both suite and ezKeyName should both be null or not null");

      // Generate EDEK if necessary while not holding the lock
      edek = generateEncryptedDataEncryptionKey(ezKeyName);
      EncryptionFaultInjector.getInstance().startFileAfterGenerateKey();
    }

    // Proceed with the create, using the computed cipher suite and 
    // generated EDEK
    BlocksMapUpdateInfo toRemoveBlocks = null;
    writeLock();
    try {
      checkOperation(OperationCategory.WRITE);
      checkNameNodeSafeMode("Cannot create file" + src);
      dir.writeLock();
      try {
    	 //解析路径
        src = dir.resolvePath(pc, src, pathComponents);
        final INodesInPath iip = dir.getINodesInPath4Write(src);
        //TODO 重要的代码
        toRemoveBlocks = startFileInternal(
            pc, iip, permissions, holder,
            clientMachine, create, overwrite,
            createParent, replication, blockSize,
            isLazyPersist, suite, protocolVersion, edek,
            logRetryCache);
        stat = FSDirStatAndListingOp.getFileInfo(
            dir, src, false, FSDirectory.isReservedRawName(srcArg), true);
      } finally {
        dir.writeUnlock();
      }
    } catch (StandbyException se) {
      skipSync = true;
      throw se;
    } finally {
      writeUnlock();
      // There might be transactions logged while trying to recover the lease.
      // They need to be sync'ed even when an exception was thrown.
      if (!skipSync) {
        getEditLog().logSync();
        if (toRemoveBlocks != null) {
          removeBlocks(toRemoveBlocks);
          toRemoveBlocks.clear();
        }
      }
    }

    logAuditEvent(true, "create", srcArg, null, stat);
    return stat;
  }
 private BlocksMapUpdateInfo startFileInternal(FSPermissionChecker pc, 
      INodesInPath iip, PermissionStatus permissions, String holder,
      String clientMachine, boolean create, boolean overwrite, 
      boolean createParent, short replication, long blockSize, 
      boolean isLazyPersist, CipherSuite suite, CryptoProtocolVersion version,
      EncryptedKeyVersion edek, boolean logRetryEntry)
      throws IOException {
    assert hasWriteLock();
    // Verify that the destination does not exist as a directory already.
    final INode inode = iip.getLastINode();
    final String src = iip.getPath();
    if (inode != null && inode.isDirectory()) {
      throw new FileAlreadyExistsException(src +
          " already exists as a directory");
    }

    final INodeFile myFile = INodeFile.valueOf(inode, src, true);
    if (isPermissionEnabled) {
      if (overwrite && myFile != null) {
        dir.checkPathAccess(pc, iip, FsAction.WRITE);
      }
      /*
       * To overwrite existing file, need to check 'w' permission 
       * of parent (equals to ancestor in this case)
       */
      dir.checkAncestorAccess(pc, iip, FsAction.WRITE);
    }
    if (!createParent) {
      dir.verifyParentDir(iip, src);
    }

    FileEncryptionInfo feInfo = null;

    final EncryptionZone zone = dir.getEZForPath(iip);
    if (zone != null) {
      // The path is now within an EZ, but we're missing encryption parameters
      if (suite == null || edek == null) {
        throw new RetryStartFileException();
      }
      // Path is within an EZ and we have provided encryption parameters.
      // Make sure that the generated EDEK matches the settings of the EZ.
      final String ezKeyName = zone.getKeyName();
      if (!ezKeyName.equals(edek.getEncryptionKeyName())) {
        throw new RetryStartFileException();
      }
      feInfo = new FileEncryptionInfo(suite, version,
          edek.getEncryptedKeyVersion().getMaterial(),
          edek.getEncryptedKeyIv(),
          ezKeyName, edek.getEncryptionKeyVersionName());
    }

    try {
      BlocksMapUpdateInfo toRemoveBlocks = null;
      if (myFile == null) {
        if (!create) {
          throw new FileNotFoundException("Can't overwrite non-existent " +
              src + " for client " + clientMachine);
        }
      } else {
        if (overwrite) {
          toRemoveBlocks = new BlocksMapUpdateInfo();
          List<INode> toRemoveINodes = new ChunkedArrayList<INode>();
          long ret = FSDirDeleteOp.delete(dir, iip, toRemoveBlocks,
                                          toRemoveINodes, now());
          if (ret >= 0) {
            iip = INodesInPath.replace(iip, iip.length() - 1, null);
            FSDirDeleteOp.incrDeletedFileCount(ret);
            removeLeasesAndINodes(src, toRemoveINodes, true);
          }
        } else {
          // If lease soft limit time is expired, recover the lease
          recoverLeaseInternal(RecoverLeaseOp.CREATE_FILE,
              iip, src, holder, clientMachine, false);
          throw new FileAlreadyExistsException(src + " for client " +
              clientMachine + " already exists");
        }
      }

      checkFsObjectLimit();
      INodeFile newNode = null;

      // Always do an implicit mkdirs for parent directory tree.
      //hadoop fs -put a.txt /usr/hive/warehouse/data/a.txt
      ///usr/hive/warehouse/data/ parent目录
      //获取到我们需要上传文件的这个目录
      Map.Entry<INodesInPath, String> parent = FSDirMkdirOp
          .createAncestorDirectories(dir, iip, permissions);
      if (parent != null) {
    	  //TODO 往文件目录树里面添加INodeFile节点
          // dir就是FSDirectory(管理目录树)
        iip = dir.addFile(parent.getKey(), parent.getValue(), permissions,
            replication, blockSize, holder, clientMachine);
        newNode = iip != null ? iip.getLastINode().asFile() : null;
      }

      if (newNode == null) {
        throw new IOException("Unable to add " + src +  " to namespace");
      }
      //TODO 添加契约
      //Lease这个单词的意思就是契约的意思。
      leaseManager.addLease(newNode.getFileUnderConstructionFeature()
          .getClientName(), src);

      // Set encryption attributes if necessary
      if (feInfo != null) {
        dir.setFileEncryptionInfo(src, feInfo);
        newNode = dir.getInode(newNode.getId()).asFile();
      }

      setNewINodeStoragePolicy(newNode, iip, isLazyPersist);

      // record file record in log, record new generation stamp
      //TODO 把元数据同步到磁盘,具体细节看元数据管理流程
      getEditLog().logOpenFile(src, newNode, overwrite, logRetryEntry);
      if (NameNode.stateChangeLog.isDebugEnabled()) {
        NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: added " +
            src + " inode " + newNode.getId() + " " + holder);
      }
      return toRemoveBlocks;
    } catch (IOException ie) {
      NameNode.stateChangeLog.warn("DIR* NameSystem.startFile: " + src + " " +
          ie.getMessage());
      throw ie;
    }
  }

添加契约
契约数据结构:

public class LeaseManager {
  public static final Log LOG = LogFactory.getLog(LeaseManager.class);

  private final FSNamesystem fsnamesystem;

  private long softLimit = HdfsConstants.LEASE_SOFTLIMIT_PERIOD;
  private long hardLimit = HdfsConstants.LEASE_HARDLIMIT_PERIOD;

  //
  // Used for handling lock-leases
  // Mapping: leaseHolder -> Lease
  //底层就是红黑树,是可以实现排序
  private final SortedMap<String, Lease> leases = new TreeMap<String, Lease>();
  // Set of: Lease
  private final NavigableSet<Lease> sortedLeases = new TreeSet<Lease>();

  // 
  // Map path names to leases. It is protected by the sortedLeases lock.
  // The map stores pathnames in lexicographical order.
  // //底层就是红黑树,是可以实现排序
  private final SortedMap<String, Lease> sortedLeasesByPath = new TreeMap<String, Lease>();
}
 synchronized Lease addLease(String holder, String src) {
	 //TODO 先查看这个契约是否已经存在。
    Lease lease = getLease(holder);
    if (lease == null) {
    	//TODO 如果没有创建一个契约
        //第一次过来肯定是没有契约的
      lease = new Lease(holder);
     //TODO 存储到数据结构里面(可以排序的)
      leases.put(holder, lease);
      sortedLeases.add(lease);
    } else {
      //TODO 如果是第二次过来那就是续约了
      renewLease(lease);
    }
    sortedLeasesByPath.put(src, lease);
    lease.paths.add(src);
    return lease;
  }

续约:

  synchronized void renewLease(String holder) {
    renewLease(getLease(holder));
  }
  synchronized void renewLease(Lease lease) {
    if (lease != null) {
      //首先从数据结构里面把这个契约移除
      sortedLeases.remove(lease);
      //修改这个契约上一次的心跳时间
      lease.renew();
      //再次把修改完了以后的这个契约加入到数据数据结构里面。
      sortedLeases.add(lease);
    }
  }

契约一定会有一个后台线程检测是否有违约的

public void run() {
      //for while
      for(; shouldRunMonitor && fsnamesystem.isRunning(); ) {
        boolean needSync = false;
        try {
          fsnamesystem.writeLockInterruptibly();
          try {
            if (!fsnamesystem.isInSafeMode()) {
              //检查契约
              needSync = checkLeases();
            }
          } finally {
            fsnamesystem.writeUnlock();
            // lease reassignments should to be sync'ed.
            if (needSync) {
              fsnamesystem.getEditLog().logSync();
            }
          }
          //每隔2秒钟就检查一次
          Thread.sleep(HdfsServerConstants.NAMENODE_LEASE_RECHECK_INTERVAL);
        } catch(InterruptedException ie) {
          if (LOG.isDebugEnabled()) {
            LOG.debug(name + " is interrupted", ie);
          }
        }
      }
    }

checkLeases()验证契约

synchronized boolean checkLeases() {
    boolean needSync = false;
    assert fsnamesystem.hasWriteLock();
    Lease leaseToCheck = null;
    try {
       //从可排序的数据结构里面拿出来第一个契约(拿出的是最老的那个契约)
      leaseToCheck = sortedLeases.first();
    } catch(NoSuchElementException e) {}

    while(leaseToCheck != null) {
    	//TODO 最老的契约是否过期
      if (!leaseToCheck.expiredHardLimit()) {
    	  //如果最老的契约没有过期,然后就break。
    	  //也就是说连最早的契约都还没过期,比较新的更不会过期了
    	  //所以就不用检查下去了。不用全表秒。
        break;
      }

      LOG.info(leaseToCheck + " has expired hard limit");

      final List<String> removing = new ArrayList<String>();
      // need to create a copy of the oldest lease paths, because 
      // internalReleaseLease() removes paths corresponding to empty files,
      // i.e. it needs to modify the collection being iterated over
      // causing ConcurrentModificationException
      String[] leasePaths = new String[leaseToCheck.getPaths().size()];
      leaseToCheck.getPaths().toArray(leasePaths);
      for(String p : leasePaths) {
        try {
          INodesInPath iip = fsnamesystem.getFSDirectory().getINodesInPath(p,
              true);
          boolean completed = fsnamesystem.internalReleaseLease(leaseToCheck, p,
              iip, HdfsServerConstants.NAMENODE_LEASE_HOLDER);
          if (LOG.isDebugEnabled()) {
            if (completed) {
              LOG.debug("Lease recovery for " + p + " is complete. File closed.");
            } else {
              LOG.debug("Started block recovery " + p + " lease " + leaseToCheck);
            }
          }
          // If a lease recovery happened, we need to sync later.
          if (!needSync && !completed) {
            needSync = true;
          }
        } catch (IOException e) {
          LOG.error("Cannot release the path " + p + " in the lease "
              + leaseToCheck, e);
          removing.add(p);
        }
      }

      for(String p : removing) {
    	//移除过期的契约
        removeLease(leaseToCheck, p);
      }
      
      //如果最老的那个过期了。
      //这儿就可以找第二老那个。 第三老,第四老的。
      leaseToCheck = sortedLeases.higher(leaseToCheck);
    }

    try {
      if(leaseToCheck != sortedLeases.first()) {
        LOG.warn("Unable to release hard-limit expired lease: "
          + sortedLeases.first());
      }
    } catch(NoSuchElementException e) {}
    return needSync;
  }

启动了DataStreamer

//TODO 里面初始化了DataStreamer,DataStreamer是写数据流程里面重要的对象
			final DFSOutputStream out = new DFSOutputStream(dfsClient, src, stat, flag, progress, checksum,
					favoredNodes);
			//TODO 里面启动了DataStreamer
			out.start();
	private DFSOutputStream(DFSClient dfsClient, String src, HdfsFileStatus stat, EnumSet<CreateFlag> flag,
			Progressable progress, DataChecksum checksum, String[] favoredNodes) throws IOException {
		this(dfsClient, src, progress, stat, checksum);
		this.shouldSyncBlock = flag.contains(CreateFlag.SYNC_BLOCK);
        /**
         * TODO File -> Block  -> package -> chunk
         * 
         * TODO chunk  512 byte
         * TODO chunksum  4 byte
         * TODO package: 65536 byte
         * 
         */
		computePacketChunkSize(dfsClient.getConf().writePacketSize, bytesPerChecksum);

        //TODO 类注释
	   //DataStramer的服务主要是用来处理接收数据管道里面的数据.
	   //他会向NameNode申请新的block,namenode返回来的时候,会返回来关于block的blockid和
	   //block对应的DataNode的信息,启动了这个服务器以后,它会接收packet流的。
	   // 每个packet 都是有序列号的,当这个服务器接收到一个完整的block(packet)时候,就会返回响应
	   //并且关闭当前的DataStreamer
		//TODO 创建了DataStreamer(可以去看一下这类的注释)
		streamer = new DataStreamer(stat, null);
		if (favoredNodes != null && favoredNodes.length != 0) {
			streamer.setFavoredNodes(favoredNodes);
		}
	}
	public void run() {
			long lastPacket = Time.monotonicNow();
			TraceScope scope = NullScope.INSTANCE;
			while (!streamerClosed && dfsClient.clientRunning) {
				// if the Responder encountered an error, shutdown Responder
				//再次进来的时候这个地方就能进来了
				//hasError= true
				if (hasError && response != null) {
					try {
						response.close();
						response.join();
						response = null;
					} catch (InterruptedException e) {
						DFSClient.LOG.warn("Caught exception ", e);
					}
				}

				DFSPacket one;
				try {
					// process datanode IO errors if any
					boolean doSleep = false;
					// true          ()
					if (hasError && (errorIndex >= 0 || restartingNodeIndex.get() >= 0)) {
						//这个时候代码是可以运行到这儿的
						doSleep = processDatanodeError();
					}

					synchronized (dataQueue) {
						// wait for a packet to be sent.
						long now = Time.monotonicNow();
						//TODO 第一次进来的时候,因为没有数据所以代码走的是这儿
						// dataQueue.size() == 0,把数据写到dateQueue队列
						while ((!streamerClosed && !hasError && dfsClient.clientRunning && dataQueue.size() == 0
								&& (stage != BlockConstructionStage.DATA_STREAMING
										|| stage == BlockConstructionStage.DATA_STREAMING
												&& now - lastPacket < dfsClient.getConf().socketTimeout / 2))
								|| doSleep) {
							long timeout = dfsClient.getConf().socketTimeout / 2 - (now - lastPacket);
							timeout = timeout <= 0 ? 1000 : timeout;
							timeout = (stage == BlockConstructionStage.DATA_STREAMING) ? timeout : 1000;
							try {
								//TODO 如果dataQueue里面没有数据,代码就会阻塞在这儿。
								dataQueue.wait(timeout);
							} catch (InterruptedException e) {
								DFSClient.LOG.warn("Caught exception ", e);
							}
							doSleep = false;
							now = Time.monotonicNow();
						}
						if (streamerClosed || hasError || !dfsClient.clientRunning) {
							continue;
						}
						// get packet to be sent.
						if (dataQueue.isEmpty()) {
							one = createHeartbeatPacket();
							assert one != null;
						} else {
							//TODO 往队列里面取出来packet
							one = dataQueue.getFirst(); // regular data packet
							long parents[] = one.getTraceParents();
							if (parents.length > 0) {
								scope = Trace.startSpan("dataStreamer", new TraceInfo(0, parents[0]));
								// TODO: use setParents API once it's available from HTrace 3.2
								// scope = Trace.startSpan("dataStreamer", Sampler.ALWAYS);
								// scope.getSpan().setParents(parents);
							}
						}
					}

					// get new block from namenode.
					/**
					 * 建立数据管道
					 * 向NameNode申请Block
					 */
					if (stage == BlockConstructionStage.PIPELINE_SETUP_CREATE) {
						if (DFSClient.LOG.isDebugEnabled()) {
							DFSClient.LOG.debug("Allocating new block");
						}
						//TODO 步骤一:建立数据管道
						/**
						 * nextBlockOutputStream 这个方法里面完成了两个事:
						 * 1)向Namenode申请block
						 * 2) 建立数据管道
						 */
						setPipeline(nextBlockOutputStream());
						//重要
						//TODO 步骤二:启动了ResponseProcessor 用来监听我们一个packet发送是否成功
						initDataStreaming();
					} else if (stage == BlockConstructionStage.PIPELINE_SETUP_APPEND) {
						if (DFSClient.LOG.isDebugEnabled()) {
							DFSClient.LOG.debug("Append to block " + block);
						}
						setupPipelineForAppendOrRecovery();
						initDataStreaming();
					}

					long lastByteOffsetInBlock = one.getLastByteOffsetBlock();
					if (lastByteOffsetInBlock > blockSize) {
						throw new IOException("BlockSize " + blockSize + " is smaller than data size. "
								+ " Offset of packet in block " + lastByteOffsetInBlock + " Aborting file " + src);
					}

					if (one.isLastPacketInBlock()) {
						// wait for all data packets have been successfully acked
						synchronized (dataQueue) {
							while (!streamerClosed && !hasError && ackQueue.size() != 0 && dfsClient.clientRunning) {
								try {
									// wait for acks to arrive from datanodes
									dataQueue.wait(1000);
								} catch (InterruptedException e) {
									DFSClient.LOG.warn("Caught exception ", e);
								}
							}
						}
						if (streamerClosed || hasError || !dfsClient.clientRunning) {
							continue;
						}
						stage = BlockConstructionStage.PIPELINE_CLOSE;
					}

					// send the packet
					Span span = null;
					synchronized (dataQueue) {
						// move packet from dataQueue to ackQueue
						if (!one.isHeartbeatPacket()) {
							span = scope.detach();
							one.setTraceSpan(span);
							//TODO 步骤三:从dataQueue把要发送的这个packet移除初五
							dataQueue.removeFirst();
							//TODO 步骤四:然后往ackQueue里面添加这个packet
							ackQueue.addLast(one);
							dataQueue.notifyAll();
						}
					}

					if (DFSClient.LOG.isDebugEnabled()) {
						DFSClient.LOG.debug("DataStreamer block " + block + " sending packet " + one);
					}

					// write out data to remote datanode
					TraceScope writeScope = Trace.startSpan("writeTo", span);
					try {
						//这个就是我们写数据代码
						one.writeTo(blockStream);
						blockStream.flush();
					} catch (IOException e) {
						// HDFS-3398 treat primary DN is down since client is unable to
						// write to primary DN. If a failed or restarting node has already
						// been recorded by the responder, the following call will have no
						// effect. Pipeline recovery can handle only one node error at a
						// time. If the primary node fails again during the recovery, it
						// will be taken out then.
						//PrimaryDatanode 指的是数据管道第一個datanode
						tryMarkPrimaryDatanodeFailed();
						//抛异常
						throw e;
					} finally {
						writeScope.close();
					}
					lastPacket = Time.monotonicNow();

					// update bytesSent
					long tmpBytesSent = one.getLastByteOffsetBlock();
					if (bytesSent < tmpBytesSent) {
						bytesSent = tmpBytesSent;
					}

					if (streamerClosed || hasError || !dfsClient.clientRunning) {
						continue;
					}

					// Is this block full?
					if (one.isLastPacketInBlock()) {
						// wait for the close packet has been acked
						synchronized (dataQueue) {
							while (!streamerClosed && !hasError && ackQueue.size() != 0 && dfsClient.clientRunning) {
								dataQueue.wait(1000);// wait for acks to arrive from datanodes
							}
						}
						if (streamerClosed || hasError || !dfsClient.clientRunning) {
							continue;
						}

						endBlock();
					}
					if (progress != null) {
						progress.progress();
					}

					// This is used by unit test to trigger race conditions.
					if (artificialSlowdown != 0 && dfsClient.clientRunning) {
						Thread.sleep(artificialSlowdown);
					}
				} catch (Throwable e) {
					// Log warning if there was a real error.
					if (restartingNodeIndex.get() == -1) {
						DFSClient.LOG.warn("DataStreamer Exception", e);
					}
					if (e instanceof IOException) {
						setLastException((IOException) e);
					} else {
						setLastException(new IOException("DataStreamer Exception: ", e));
					}
					//捕获到了异常
					//把标识改为true
					hasError = true;
					if (errorIndex == -1 && restartingNodeIndex.get() == -1) {
						// Not a datanode issue
						streamerClosed = true;
					}
				} finally {
					scope.close();
				}
			}
			closeInternal();
		}

继续之前的方法 开启契约

		//重要代码
		/**
		 * 总结:
		 * 1) 往文件目录树里面添加了文件
		 * 2) 添加了契约管理
		 * 3) 启动了DataStreamer
		 */
		final DFSOutputStream result = DFSOutputStream.newStreamForCreate(this, src, masked, flag, createParent,
				replication, blockSize, progress, buffersize, dfsClientConf.createChecksum(checksumOpt),
				getFavoredNodesStr(favoredNodes));
		//TODO 开启契约
		beginFileLease(result.getFileId(), result);
	private void beginFileLease(final long inodeId, final DFSOutputStream out) throws IOException {
		getLeaseRenewer().put(inodeId, out, this);
	}
synchronized void put(final long inodeId, final DFSOutputStream out,
      final DFSClient dfsc) {
    if (dfsc.isClientRunning()) {
      if (!isRunning() || isRenewerExpired()) {
        //start a new deamon with a new id.
        final int id = ++currentId;
        //创建了一个后台线程
        daemon = new Daemon(new Runnable() {
          @Override
          public void run() {
            try {
              if (LOG.isDebugEnabled()) {
                LOG.debug("Lease renewer daemon for " + clientsString()
                    + " with renew id " + id + " started");
              }
              //LeaseRenewer  就是进行契约续约的
              LeaseRenewer.this.run(id);
            } catch(InterruptedException e) {
              if (LOG.isDebugEnabled()) {
                LOG.debug(LeaseRenewer.this.getClass().getSimpleName()
                    + " is interrupted.", e);
              }
            } finally {
              synchronized(LeaseRenewer.this) {
                Factory.INSTANCE.remove(LeaseRenewer.this);
              }
              if (LOG.isDebugEnabled()) {
                LOG.debug("Lease renewer daemon for " + clientsString()
                    + " with renew id " + id + " exited");
              }
            }
          }
          
          @Override
          public String toString() {
            return String.valueOf(LeaseRenewer.this);
          }
        });
        daemon.start();
      }
      dfsc.putFileBeingWritten(inodeId, out);
      emptyTime = Long.MAX_VALUE;
    }
  }
 private void run(final int id) throws InterruptedException {
	  //for(int i=0;i< 10;i++)
	  //代码就是每隔1秒就会检查
    for(long lastRenewed = Time.monotonicNow(); !Thread.interrupted();Thread.sleep(getSleepPeriod())) {
    	//当前时间 -  上一次续约的时间
      final long elapsed = Time.monotonicNow() - lastRenewed;
      //如果已经超过30秒没有进行续约
      if (elapsed >= getRenewalTime()) {
        try {
        	//就进行续约
          renew();
          if (LOG.isDebugEnabled()) {
            LOG.debug("Lease renewer daemon for " + clientsString()
                + " with renew id " + id + " executed");
          }
          lastRenewed = Time.monotonicNow();
        } catch (SocketTimeoutException ie) {
          LOG.warn("Failed to renew lease for " + clientsString() + " for "
              + (elapsed/1000) + " seconds.  Aborting ...", ie);
          synchronized (this) {
            while (!dfsclients.isEmpty()) {
              dfsclients.get(0).abort();
            }
          }
          break;
        } catch (IOException ie) {
          LOG.warn("Failed to renew lease for " + clientsString() + " for "
              + (elapsed/1000) + " seconds.  Will retry shortly ...", ie);
        }
      }

      synchronized(this) {
        if (id != currentId || isRenewerExpired()) {
          if (LOG.isDebugEnabled()) {
            if (id != currentId) {
              LOG.debug("Lease renewer daemon for " + clientsString()
                  + " with renew id " + id + " is not current");
            } else {
               LOG.debug("Lease renewer daemon for " + clientsString()
                  + " with renew id " + id + " expired");
            }
          }
          //no longer the current daemon or expired
          return;
        }

        // if no clients are in running state or there is no more clients
        // registered with this renewer, stop the daemon after the grace
        // period.
        if (!clientsRunning() && emptyTime == Long.MAX_VALUE) {
          emptyTime = Time.monotonicNow();
        }
      }
    }
  }

  private void renew() throws IOException {
    final List<DFSClient> copies;
    synchronized(this) {
      copies = new ArrayList<DFSClient>(dfsclients);
    }
    //sort the client names for finding out repeated names.
    Collections.sort(copies, new Comparator<DFSClient>() {
      @Override
      public int compare(final DFSClient left, final DFSClient right) {
        return left.getClientName().compareTo(right.getClientName());
      }
    });
    String previousName = "";
    for(int i = 0; i < copies.size(); i++) {
      final DFSClient c = copies.get(i);
      //skip if current client name is the same as the previous name.
      if (!c.getClientName().equals(previousName)) {
    	  //重点代码
        if (!c.renewLease()) {
          if (LOG.isDebugEnabled()) {
            LOG.debug("Did not renew lease for client " +
                c);
          }
          continue;
        }
        previousName = c.getClientName();
        if (LOG.isDebugEnabled()) {
          LOG.debug("Lease renewed for client " + previousName);
        }
      }
    }
  }
	boolean renewLease() throws IOException {
		if (clientRunning && !isFilesBeingWrittenEmpty()) {
			try {
				//获取namenode的代理进行续约
				namenode.renewLease(clientName);
				//修改上一次的续约时间
				updateLastLeaseRenewal();
				return true;
			} catch (IOException e) {
				// Abort if the lease has already expired.
				final long elapsed = Time.monotonicNow() - getLastLeaseRenewal();
				if (elapsed > HdfsConstants.LEASE_HARDLIMIT_PERIOD) {
					LOG.warn("Failed to renew lease for " + clientName + " for " + (elapsed / 1000)
							+ " seconds (>= hard-limit =" + (HdfsConstants.LEASE_HARDLIMIT_PERIOD / 1000)
							+ " seconds.) " + "Closing all files being written ...", e);
					closeAllFilesBeingWritten(true);
				} else {
					// Let the lease renewer handle it and retry.
					throw e;
				}
			}
		}
		return false;
	}
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值