初识CK源码,有所纰漏请大家指正
首先在MergeTreeData::MergeTreeData()初始化background_moves_assignee
然后BackgroundJobsAssignee::start()启动任务调度,处理DataProcessing、Moving两种枚举类型的操作
DataProcessing:包括merge操作
Moving:包括ttl move操作、move part/partition操作
本文主要走读Moving部分代码逻辑
switch (type)
{
case Type::DataProcessing:
succeed = data.scheduleDataProcessingJob(*this);
break;
case Type::Moving:
succeed = data.scheduleDataMovingJob(*this);
break;
}
一、选择一些part进行move
auto moving_tagger = selectPartsForMove();
if (moving_tagger->parts_to_move.empty())
return false;
排除正在merge和正在moving的part
获取数据表的数据分区 getDataPartsVectorForInternalUsage({DataPartState::Active})
然后拿到存储策略和volume等相关信息,判断磁盘剩余空间和ttl策略等信息
遍历数据表的所有数据分区:
- 如果can_move函数判断该数据分区可以移动,则继续后续操作;否则跳过;
- 根据数据分区的TTL信息选择最适合的TTL规则(ttl_entry);
- 如果存在适合的TTL规则,则根据规则选择目标位置(destination)并尝试预留空间(reservation);
- 如果成功预留了空间(reservation),则将该数据分区和预留的空间添加到parts_to_move中,并更新计数器和总大小;
- 否则,如果找到了该磁盘在need_to_move中的条目(to_insert),则将该数据分区添加到to_insert中。
std::unordered_map<DiskPtr, LargestPartsWithRequiredSize> need_to_move;
const auto policy = data->getStoragePolicy();
const auto & volumes = policy->getVolumes();
if (!volumes.empty())
{
/// Do not check last volume
for (size_t i = 0; i != volumes.size() - 1; ++i)
{
for (const auto & disk : volumes[i]->getDisks())
{
UInt64 required_maximum_available_space = disk->getTotalSpace() * policy->getMoveFactor();
UInt64 unreserved_space = disk->getUnreservedSpace();
if (unreserved_space < required_maximum_available_space && !disk->isBroken())
need_to_move.emplace(disk, required_maximum_available_space - unreserved_space);
}
}
}
time_t time_of_move = time(nullptr);
auto metadata_snapshot = data->getInMemoryMetadataPtr();
if (need_to_move.empty() && !metadata_snapshot->hasAnyMoveTTL())
return false;
for (const auto & part : data_parts)
{
String reason;
/// Don't report message to log, because logging is excessive.
if (!can_move(part, &reason))
continue;
auto ttl_entry = selectTTLDescriptionForTTLInfos(metadata_snapshot->getMoveTTLs(), part->ttl_infos.moves_ttl, time_of_move, true);
auto to_insert = need_to_move.find(part->volume->getDisk());
ReservationPtr reservation;
if (ttl_entry)
{
auto destination = data->getDestinationForMoveTTL(*ttl_entry);
if (destination && !data->isPartInTTLDestination(*ttl_entry, *part))
reservation = data->tryReserveSpace(part->getBytesOnDisk(), data->getDestinationForMoveTTL(*ttl_entry));
}
if (reservation) /// Found reservation by TTL rule.
{
parts_to_move.emplace_back(part, std::move(reservation));
/// If table TTL rule satisfies on this part, won't apply policy rules on it.
/// In order to not over-move, we need to "release" required space on this disk,
/// possibly to zero.
if (to_insert != need_to_move.end())
{
to_insert->second.decreaseRequiredSizeAndRemoveRedundantParts(part->getBytesOnDisk());
}
++parts_to_move_by_ttl_rules;
parts_to_move_total_size_bytes += part->getBytesOnDisk();
}
else
{
if (to_insert != need_to_move.end())
to_insert->second.add(part);
}
}
遍历need_to_move的所有条目:
- 获取最小卷索引(min_volume_index)。
- 遍历累积的数据分区(accumulatedParts):
- 根据最小卷索引尝试预留空间(reservation);
- 如果成功预留了空间,则将该数据分区和预留的空间添加到parts_to_move中,并更新计数器和总大小。
for (auto && move : need_to_move)
{
auto min_volume_index = policy->getVolumeIndexByDisk(move.first) + 1;
for (auto && part : move.second.getAccumulatedParts())
{
auto reservation = policy->reserve(part->getBytesOnDisk(), min_volume_index);
if (!reservation)
{
break;
}
parts_to_move.emplace_back(part, std::move(reservation));
++parts_to_move_by_policy_rules;
parts_to_move_total_size_bytes += part->getBytesOnDisk();
}
}
至此,就已经拿到需要进行move的part了
二、进行part的move
assignee.scheduleMoveTask(std::make_shared<ExecutableLambdaAdapter>(
[this, moving_tagger] () mutable
{
return moveParts(moving_tagger);
}, moves_assignee_trigger, getStorageID()));
数据move分为是否支持和配置zero-copy和普通模式的区别,但类似的都是先进行clonePart和swapClonedPart
clonePart这里是先对原part进行复制,然后通过swapClonedPart改变新旧数据块状态
try
{
auto disk = moving_part.reserved_space->getDisk();
if (supportsReplication() && disk->supportZeroCopyReplication() && settings->allow_remote_fs_zero_copy_replication)
{
if (auto lock = tryCreateZeroCopyExclusiveLock(moving_part.part->name, disk); lock)
{
cloned_part = parts_mover.clonePart(moving_part);
parts_mover.swapClonedPart(cloned_part);
}
else
{
LOG_DEBUG(log, "Move of part {} postponed, because zero copy mode enabled and someone other moving this part right now", moving_part.part->name);
result = false;
continue;
}
}
else /// Ordinary move as it should be
{
cloned_part = parts_mover.clonePart(moving_part);
parts_mover.swapClonedPart(cloned_part);
}
write_part_log({});
}
catch (...)
{
write_part_log(ExecutionStatus::fromCurrentException());
if (cloned_part)
cloned_part->remove();
throw;
}
至此,ttl move操作已经完成;可以了解到ttl实际上是对原数据进行clone,然后进行新旧数据块的状态切换