hbase源码分析-compact

最新推荐文章于 2023-02-22 14:49:59 发布

微笑彩虹的博客

最新推荐文章于 2023-02-22 14:49:59 发布

阅读量556

点赞数

分类专栏： hbase

本文链接：https://blog.csdn.net/qq234568970/article/details/52171381

版权

hbase 专栏收录该内容

12 篇文章 1 订阅

订阅专栏

<pre name="code" class="java"> public List<StoreFile> compact(CompactionContext compaction,
      CompactionThroughputController throughputController) throws IOException {
    assert compaction != null;//检查compact上下文是否为空
    List<StoreFile> sfs = null;
    CompactionRequest cr = compaction.getRequest();;
    try {
      // Do all sanity checking in here if we have a valid CompactionRequest
      // because we need to clean up after it on the way out in a finally
      // block below   
      long compactionStartTime = EnvironmentEdgeManager.currentTime();//获取当前时间，System.currentTimeMillis();
      assert compaction.hasSelection();
      Collection<StoreFile> filesToCompact = cr.getFiles();//获取到要compact的文件
      assert !filesToCompact.isEmpty();//检查要compact的文件list是否为空
      synchronized (filesCompacting) {
        // sanity check: we're compacting files that this store knows about
        // TODO: change this to LOG.error() after more debugging
        Preconditions.checkArgument(filesCompacting.containsAll(filesToCompact));//要compact的文件是否正在compacting
      }

      // Ready to go. Have list of files to compact.记录日志，开始compact，在哪个region里有哪些file要compact，临时文件夹在哪里，totalSize
      LOG.info("Starting compaction of " + filesToCompact.size() + " file(s) in "
          + this + " of " + this.getRegionInfo().getRegionNameAsString()
          + " into tmpdir=" + fs.getTempDir() + ", totalSize="
          + TraditionalBinaryPrefix.long2String(cr.getSize(), "", 1));

      // Commence the compaction.开始compact
      List<Path> newFiles = compaction.compact(throughputController);

      // TODO: get rid of this!是否要完整的完成compact
      if (!this.conf.getBoolean("hbase.hstore.compaction.complete", true)) {
        LOG.warn("hbase.hstore.compaction.complete is set to false");
        sfs = new ArrayList<StoreFile>(newFiles.size());//compact后的文件
        for (Path newFile : newFiles) {
          // Create storefile around what we wrote with a reader on it.
          StoreFile sf = createStoreFileAndReader(newFile);//根据compact后的文件创建storefile，然后关闭reader。把storefile添加到sfs
          sf.closeReader(true);
          sfs.add(sf);
        }
        return sfs;
      }
      // Do the steps necessary to complete the compaction.
      sfs = moveCompatedFilesIntoPlace(cr, newFiles);//移动compact后的文件到相应位置
      writeCompactionWalRecord(filesToCompact, sfs);//写WAL
      replaceStoreFiles(filesToCompact, sfs);
      if (cr.isMajor()) {//判断是否是major compact 
        majorCompactedCellsCount += getCompactionProgress().totalCompactingKVs;
        majorCompactedCellsSize += getCompactionProgress().totalCompactedSize;
      } else {
        compactedCellsCount += getCompactionProgress().totalCompactingKVs;
        compactedCellsSize += getCompactionProgress().totalCompactedSize;
      }
      // At this point the store will use new files for all new scanners.
      completeCompaction(filesToCompact, true); // Archive old files & update store size.完成compact操作

      logCompactionEndMessage(cr, sfs, compactionStartTime);//记录log 
      return sfs;
    } finally {
      finishCompactionRequest(cr);
    }
  }

protected FileDetails getFileDetails(Collection<StoreFile> filesToCompact,
			boolean allFiles) throws IOException {
		FileDetails fd = new FileDetails();
		// MVCC（Multi-Version Concurrent
		// Control），即多版本并发控制协议，它的目标是在保证数据一致性的前提下，提供一种高并发的访问性能。在MVCC协议中
		//，每个用户在连接数据库时看到的是一个具有一致性状态的镜像，每个事务在提交到数据库之前对其他用户均是不可见的。当事务需要更新数据时，
		//不会直接覆盖以前的数据，而是生成一个新的版本的数据，因此一条数据会有多个版本存储，但是同一时刻只有最新的版本号是有效的。因此，读的时候就可以保证总是以当前时刻的版本的数据可以被读到，不论这条数据后来是否被修改或删除。
		long oldestHFileTimeStampToKeepMVCC = System.currentTimeMillis()
				- (1000L * 60 * 60 * 24 * this.keepSeqIdPeriod);//指定了在compact过程中多少天要保存的mvcc值，从hbase.hstore.compaction.keep.seqId.period取值，默认为5
//判断是否是allFiles，并且file修改的timestamp不能小于oldestHFileTimeStampToKeepMVCC
		for (StoreFile file : filesToCompact) {
			if (allFiles
					&& (file.getModificationTimeStamp() < oldestHFileTimeStampToKeepMVCC)) {
				// when isAllFiles is true, all files are compacted so we can
				// calculate the smallest
				// MVCC value to keep
				// 在MemStore中有一个变量MemstoreTS，该变量是随put操作而递增的。比如首先往列A，timeStamp为T1上put一条数据data1，假设此时MemstoreTS为1；
				// 之后如果想更新这条数据，只需要往列A，timeStamp为T1上put一条数据data2，此时MemstoreTS为2，Hbase会自动会将MemstoreTS大的排在前面。MemstoreTS小的在Compaction过程中就被过滤掉了。
				if (fd.minSeqIdToKeep < file.getMaxMemstoreTS()) {
					fd.minSeqIdToKeep = file.getMaxMemstoreTS();
				}
			}
			long seqNum = file.getMaxSequenceId();
			fd.maxSeqId = Math.max(fd.maxSeqId, seqNum);
			StoreFile.Reader r = file.getReader();
			if (r == null) {
				LOG.warn("Null reader for " + file.getPath());
				continue;
			}
			// NOTE: use getEntries when compacting instead of getFilterEntries,
			// otherwise under-sized
			// blooms can cause progress to be miscalculated or if the user
			// switches bloom
			// type (e.g. from ROW to ROWCOL)
			//在HBase中，Key-Value是最小的存储单元。在这个存储单元中，每一个Key-Value对应一个列。Value对应于一个列的列值。
			long keyCount = r.getEntries();//The number of key/value pairs in the file
			fd.maxKeyCount += keyCount;
			// calculate the latest MVCC readpoint in any of the involved store
			// files
			Map<byte[], byte[]> fileInfo = r.loadFileInfo();
			byte tmp[] = null;
			// Get and set the real MVCCReadpoint for bulk loaded files, which
			// is the
			// SeqId number.
			//如果是Bulk导入的，maxMVCCReadpoint为fd的maxMVCCReadpoint和文件SequenceID中较大者  
			if (r.isBulkLoaded()) {
				fd.maxMVCCReadpoint = Math.max(fd.maxMVCCReadpoint,
						r.getSequenceID());
			} else {// 否则，读取文件信息中最大的memstore时间戳MAX_MEMSTORE_TS_KEY  
				tmp = fileInfo.get(HFileWriterV2.MAX_MEMSTORE_TS_KEY);
				if (tmp != null) {          // maxMVCCReadpoint就是fd的maxMVCCReadpoint和文件信息中最大的memstore时间戳MAX_MEMSTORE_TS_KEY中较大者  
					fd.maxMVCCReadpoint = Math.max(fd.maxMVCCReadpoint,
							Bytes.toLong(tmp));
				}
			}
			tmp = fileInfo.get(FileInfo.MAX_TAGS_LEN);
			if (tmp != null) {
				fd.maxTagsLength = Math.max(fd.maxTagsLength, Bytes.toInt(tmp));
			}
			// If required, calculate the earliest put timestamp of all involved
			// storefiles.如果需要的话，计算所有相关storefile最早的timestamp
			// This is used to remove family delete marker during compaction.
			long earliestPutTs = 0;
			if (allFiles) {
				tmp = fileInfo.get(StoreFile.EARLIEST_PUT_TS);
				if (tmp == null) {
					// There's a file with no information, must be an old one
					// assume we have very old puts
					fd.earliestPutTs = earliestPutTs = HConstants.OLDEST_TIMESTAMP;
				} else {
					earliestPutTs = Bytes.toLong(tmp);
					fd.earliestPutTs = Math
							.min(fd.earliestPutTs, earliestPutTs);
				}
			}
			if (LOG.isDebugEnabled()) {
				LOG.debug("Compacting "
						+ file
						+ ", keycount="
						+ keyCount
						+ ", bloomtype="
						+ r.getBloomFilterType().toString()
						+ ", size="
						+ TraditionalBinaryPrefix.long2String(r.length(), "", 1)
						+ ", encoding="
						+ r.getHFileReader().getDataBlockEncoding()
						+ ", seqNum=" + seqNum
						+ (allFiles ? ", earliestPutTs=" + earliestPutTs : ""));
			}
		}
		return fd;
	}

微笑彩虹的博客

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
hbase源码分析-compact

public List compact(CompactionContext compaction, CompactionThroughputController throughputController) throws IOException { assert compaction != null;//检查compact上下文是否为空 List sfs = null; CompactionRequ
复制链接

扫一扫