hbase源码分析-compact

<pre name="code" class="java"> public List<StoreFile> compact(CompactionContext compaction,
      CompactionThroughputController throughputController) throws IOException {
    assert compaction != null;//检查compact上下文是否为空
    List<StoreFile> sfs = null;
    CompactionRequest cr = compaction.getRequest();;
    try {
      // Do all sanity checking in here if we have a valid CompactionRequest
      // because we need to clean up after it on the way out in a finally
      // block below   
      long compactionStartTime = EnvironmentEdgeManager.currentTime();//获取当前时间,System.currentTimeMillis();
      assert compaction.hasSelection();
      Collection<StoreFile> filesToCompact = cr.getFiles();//获取到要compact的文件
      assert !filesToCompact.isEmpty();//检查要compact的文件list是否为空
      synchronized (filesCompacting) {
        // sanity check: we're compacting files that this store knows about
        // TODO: change this to LOG.error() after more debugging
        Preconditions.checkArgument(filesCompacting.containsAll(filesToCompact));//要compact的文件是否正在compacting
      }

      // Ready to go. Have list of files to compact.记录日志,开始compact,在哪个region里有哪些file要compact,临时文件夹在哪里,totalSize
      LOG.info("Starting compaction of " + filesToCompact.size() + " file(s) in "
          + this + " of " + this.getRegionInfo().getRegionNameAsString()
          + " into tmpdir=" + fs.getTempDir() + ", totalSize="
          + TraditionalBinaryPrefix.long2String(cr.getSize(), "", 1));

      // Commence the compaction.开始compact
      List<Path> newFiles = compaction.compact(throughputController);

      // TODO: get rid of this!是否要完整的完成compact
      if (!this.conf.getBoolean("hbase.hstore.compaction.complete", true)) {
        LOG.warn("hbase.hstore.compaction.complete is set to false");
        sfs = new ArrayList<StoreFile>(newFiles.size());//compact后的文件
        for (Path newFile : newFiles) {
          // Create storefile around what we wrote with a reader on it.
          StoreFile sf = createStoreFileAndReader(newFile);//根据compact后的文件创建storefile,然后关闭reader。把storefile添加到sfs
          sf.closeReader(true);
          sfs.add(sf);
        }
        return sfs;
      }
      // Do the steps necessary to complete the compaction.
      sfs = moveCompatedFilesIntoPlace(cr, newFiles);//移动compact后的文件到相应位置
      writeCompactionWalRecord(filesToCompact, sfs);//写WAL
      replaceStoreFiles(filesToCompact, sfs);
      if (cr.isMajor()) {//判断是否是major compact 
        majorCompactedCellsCount += getCompactionProgress().totalCompactingKVs;
        majorCompactedCellsSize += getCompactionProgress().totalCompactedSize;
      } else {
        compactedCellsCount += getCompactionProgress().totalCompactingKVs;
        compactedCellsSize += getCompactionProgress().totalCompactedSize;
      }
      // At this point the store will use new files for all new scanners.
      completeCompaction(filesToCompact, true); // Archive old files & update store size.完成compact操作

      logCompactionEndMessage(cr, sfs, compactionStartTime);//记录log 
      return sfs;
    } finally {
      finishCompactionRequest(cr);
    }
  }

protected FileDetails getFileDetails(Collection<StoreFile> filesToCompact,
			boolean allFiles) throws IOException {
		FileDetails fd = new FileDetails();
		// MVCC(Multi-Version Concurrent
		// Control),即多版本并发控制协议,它的目标是在保证数据一致性的前提下,提供一种高并发的访问性能。在MVCC协议中
		//,每个用户在连接数据库时看到的是一个具有一致性状态的镜像,每个事务在提交到数据库之前对其他用户均是不可见的。当事务需要更新数据时,
		//不会直接覆盖以前的数据,而是生成一个新的版本的数据,因此一条数据会有多个版本存储,但是同一时刻只有最新的版本号是有效的。因此,读的时候就可以保证总是以当前时刻的版本的数据可以被读到,不论这条数据后来是否被修改或删除。
		long oldestHFileTimeStampToKeepMVCC = System.currentTimeMillis()
				- (1000L * 60 * 60 * 24 * this.keepSeqIdPeriod);//指定了在compact过程中多少天要保存的mvcc值,从hbase.hstore.compaction.keep.seqId.period取值,默认为5
//判断是否是allFiles,并且file修改的timestamp不能小于oldestHFileTimeStampToKeepMVCC
		for (StoreFile file : filesToCompact) {
			if (allFiles
					&& (file.getModificationTimeStamp() < oldestHFileTimeStampToKeepMVCC)) {
				// when isAllFiles is true, all files are compacted so we can
				// calculate the smallest
				// MVCC value to keep
				// 在MemStore中有一个变量MemstoreTS,该变量是随put操作而递增的。比如首先往列A,timeStamp为T1上put一条数据data1,假设此时MemstoreTS为1;
				// 之后如果想更新这条数据,只需要往列A,timeStamp为T1上put一条数据data2,此时MemstoreTS为2,Hbase会自动会将MemstoreTS大的排在前面。MemstoreTS小的在Compaction过程中就被过滤掉了。
				if (fd.minSeqIdToKeep < file.getMaxMemstoreTS()) {
					fd.minSeqIdToKeep = file.getMaxMemstoreTS();
				}
			}
			long seqNum = file.getMaxSequenceId();
			fd.maxSeqId = Math.max(fd.maxSeqId, seqNum);
			StoreFile.Reader r = file.getReader();
			if (r == null) {
				LOG.warn("Null reader for " + file.getPath());
				continue;
			}
			// NOTE: use getEntries when compacting instead of getFilterEntries,
			// otherwise under-sized
			// blooms can cause progress to be miscalculated or if the user
			// switches bloom
			// type (e.g. from ROW to ROWCOL)
			//在HBase中,Key-Value是最小的存储单元。在这个存储单元中,每一个Key-Value对应一个列。Value对应于一个列的列值。
			long keyCount = r.getEntries();//The number of key/value pairs in the file
			fd.maxKeyCount += keyCount;
			// calculate the latest MVCC readpoint in any of the involved store
			// files
			Map<byte[], byte[]> fileInfo = r.loadFileInfo();
			byte tmp[] = null;
			// Get and set the real MVCCReadpoint for bulk loaded files, which
			// is the
			// SeqId number.
			//如果是Bulk导入的,maxMVCCReadpoint为fd的maxMVCCReadpoint和文件SequenceID中较大者  
			if (r.isBulkLoaded()) {
				fd.maxMVCCReadpoint = Math.max(fd.maxMVCCReadpoint,
						r.getSequenceID());
			} else {// 否则,读取文件信息中最大的memstore时间戳MAX_MEMSTORE_TS_KEY  
				tmp = fileInfo.get(HFileWriterV2.MAX_MEMSTORE_TS_KEY);
				if (tmp != null) {          // maxMVCCReadpoint就是fd的maxMVCCReadpoint和文件信息中最大的memstore时间戳MAX_MEMSTORE_TS_KEY中较大者  
					fd.maxMVCCReadpoint = Math.max(fd.maxMVCCReadpoint,
							Bytes.toLong(tmp));
				}
			}
			tmp = fileInfo.get(FileInfo.MAX_TAGS_LEN);
			if (tmp != null) {
				fd.maxTagsLength = Math.max(fd.maxTagsLength, Bytes.toInt(tmp));
			}
			// If required, calculate the earliest put timestamp of all involved
			// storefiles.如果需要的话,计算所有相关storefile最早的timestamp
			// This is used to remove family delete marker during compaction.
			long earliestPutTs = 0;
			if (allFiles) {
				tmp = fileInfo.get(StoreFile.EARLIEST_PUT_TS);
				if (tmp == null) {
					// There's a file with no information, must be an old one
					// assume we have very old puts
					fd.earliestPutTs = earliestPutTs = HConstants.OLDEST_TIMESTAMP;
				} else {
					earliestPutTs = Bytes.toLong(tmp);
					fd.earliestPutTs = Math
							.min(fd.earliestPutTs, earliestPutTs);
				}
			}
			if (LOG.isDebugEnabled()) {
				LOG.debug("Compacting "
						+ file
						+ ", keycount="
						+ keyCount
						+ ", bloomtype="
						+ r.getBloomFilterType().toString()
						+ ", size="
						+ TraditionalBinaryPrefix.long2String(r.length(), "", 1)
						+ ", encoding="
						+ r.getHFileReader().getDataBlockEncoding()
						+ ", seqNum=" + seqNum
						+ (allFiles ? ", earliestPutTs=" + earliestPutTs : ""));
			}
		}
		return fd;
	}



                
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值