磁盘管理
磁盘管理是管理ledger目录以及index目录的磁盘使用情况。BookKeeper 中有一个周期性任务来执行目录检测,这个任务在LedgerDirsMonitor中,这里会通过 LedgerDirsManager 检测ledger/index磁盘的使用情况,对于每个 LedgerDirsManager都会执行下面的检测逻辑:
public LedgerDirsMonitor(final ServerConfiguration conf,
final DiskChecker diskChecker,
final List<LedgerDirsManager> dirsManagers) {
this.interval = conf.getDiskCheckInterval();
// 高优先级写入所需的最小的磁盘空间,默认值是创建entry log文件所需的最小空间
this.minUsableSizeForHighPriorityWrites = conf.getMinUsableSizeForHighPriorityWrites();
this.conf = conf;
// 两个对象:
// 1. DiskChecker,执行实际的disk检测逻辑
// 2. LedgerDirsManager, 管理Ledger/Index目录
this.diskChecker = diskChecker;
this.dirsManagers = dirsManagers;
}
...
private void check(final LedgerDirsManager ldm) {
final ConcurrentMap<File, Float> diskUsages = ldm.getDiskUsages();
try {
List<File> writableDirs = ldm.getWritableLedgerDirs();
// 检测每个writable目录的空间使用
for (File dir : writableDirs) {
try {
// 实际上是通过 diskChecker来检查磁盘使用情况
// 如果磁盘使用率没有超过两个限制,就保存使用率
diskUsages.put(dir, diskChecker.checkDir(dir));
} catch (DiskErrorException e) {
// 遇到磁盘失败,触发listener的 diskFailed 方法, 默认会触发 bookie的shutdown
LOG.error("Ledger directory {} failed on disk checking : ", dir, e);
for (LedgerDirsListener listener : ldm.getListeners()) {
listener.diskFailed(dir);
}
} catch (DiskWarnThresholdException e) {
// 使用率超过警告线,打印使用率信息,并且触发listener的 diskAlmostFull 方法
// 默认会将 shouldCreateNewEntryLog 置为true
diskUsages.compute(dir, (d, prevUsage) -> {
if (null == prevUsage || e.getUsage() != prevUsage) {
LOG.warn("Ledger directory {} is almost full : usage {}", dir, e.getUsage());
}
return e.getUsage();
});
for (LedgerDirsListener listener : ldm.getListeners()) {
listener.diskAlmostFull(dir);
}
} catch (DiskOutOfSpaceException e) {
// 使用率超过警告线,打印使用率信息,然后将当前目录添加到filledDirs中,
// 并且从writableLedgerDirs中删除
diskUsages.compute(dir, (d, prevUsage) -> {
if (null == prevUsage || e.getUsage() != prevUsage) {
LOG.error("Ledger directory {} is out-of-space : usage {}", dir, e.getUsage());
}
return e.getUsage();
});
ldm.addToFilledDirs(dir);
}
}
// 将所有目录检查完毕之后,查看是否存储可读目录,如果没有直接抛出NoWritableLedgerDirException,
// 这样做的好处是不用等到下一个检查中才发现没有可写的目录,及时发现目录全部不可写入
ldm.getWritableLedgerDirs();
} catch (NoWritableLedgerDirException e) {
LOG.warn("LedgerDirsMonitor check process: All ledger directories are non writable");
// 如果没有目录可写,则将高优先级写入置为true,这些写入是数据compact、journal replay等操作
boolean highPriorityWritesAllowed = true;
try {
// 检测有没有利用率空间在 minUsableSizeForHighPriorityWrites 之上的目录
// 有则返回目录列表,没有就直接抛出 NoWritableLedgerDirException,并将
// highPriorityWritesAllowed 置为false,然后出发listner的 allDisksFull 方法,
// 默认会将 bookie 状态转化为只读
ldm.getDirsAboveUsableThresholdSize(minUsableSizeForHighPriorityWrites, false);
} catch (NoWritableLedgerDirException e1) {
highPriorityWritesAllowed = false;
}
for (LedgerDirsListener listener : ldm.getListeners()) {
listener.allDisksFull(highPriorityWritesAllowed);
}
}
List<File> fullfilledDirs = new ArrayList<File>(ldm.getFullFilledLedgerDirs());
boolean makeWritable = ldm.hasWritableLedgerDirs();
// When bookie is in READONLY mode, i.e there are no writableLedgerDirs:
// - Update fullfilledDirs disk usage.
// - If the total disk usage is below DiskLowWaterMarkUsageThreshold
// add fullfilledDirs back to writableLedgerDirs list if their usage is < conf.getDiskUsageThreshold.
try {
if (!makeWritable) {
// 如果没有可写的dir,判断磁盘使用率是否低于磁盘利用的低水位DiskLWMUsageThreshold
// 如果小于的话,设置 makeWritable 为true
float totalDiskUsage = diskChecker.getTotalDiskUsage(ldm.getAllLedgerDirs());
if (totalDiskUsage < conf.getDiskLowWaterMarkUsageThreshold()) {
makeWritable = true;
} else {
LOG.debug(
"Current TotalDiskUsage: {} is greater than LWMThreshold: {}."
+ " So not adding any filledDir to WritableDirsList",
totalDiskUsage, conf.getDiskLowWaterMarkUsageThreshold());
}
}
// 遍历fullFilledDirs,并重新检查利用率
// 1. 如果低于最大上限和告警值,则更新利用率,并且将这个目录重新放回writableLedgerDir中
// 2. 如果遇到磁盘错误,触发listener的diskFailed方法
// 3. 如果大于告警值,更新利用率,并且重新放回writableLedgerDir
// 4. 如果大于最大上限,更新利用率,不做其他操作
for (File dir : fullfilledDirs) {
try {
diskUsages.put(dir, diskChecker.checkDir(dir));
if (makeWritable) {
ldm.addToWritableDirs(dir, true);
}
} catch (DiskErrorException e) {
// Notify disk failure to all the listeners
for (LedgerDirsListener listener : ldm.getListeners()) {
listener.diskFailed(dir);
}
} catch (DiskWarnThresholdException e) {
diskUsages.put(dir, e.getUsage());
// the full-filled dir become writable but still above the warn threshold
if (makeWritable) {
ldm.addToWritableDirs(dir, false);
}
} catch (DiskOutOfSpaceException e) {
// the full-filled dir is still full-filled
diskUsages.put(dir, e.getUsage());
}
}
} catch (IOException ioe) {
LOG.error("Got IOException while monitoring Dirs", ioe);
for (LedgerDirsListener listener : ldm.getListeners()) {
listener.fatalError();
}
}
}
LedgerDirsMonitor 的检测依赖于LedgerDirsManager和DiskChecker的方法,LedgerDirsManager初始化如下:
public LedgerDirsManager(ServerConfiguration conf, File[] dirs, DiskChecker diskChecker, StatsLogger statsLogger) {
// ledger 目录列表
this.ledgerDirectories = Arrays.asList(Bookie
.getCurrentDirectories(dirs));
// 可写的 ledger 目录列表,初始时,所有目录都是可写的
this.writableLedgerDirectories = new ArrayList<File>(ledgerDirectories);
// 写满的目录列表,在检测到磁盘目录使用达到上限之后,writableLedgerDir转化为filledDir
this.filledDirs = new ArrayList<File>();
// listener,主要负责磁盘检测时触发一些特定的操作,比如变更 writableLedgerDir列表和filledDir列表
// 、bookie状态 由writable 到 readonly 的转换等
this.listeners = new ArrayList<LedgerDirsListener>();
this.entryLogSize = conf.getEntryLogSizeLimit();
Gets the minimum safe usable size to be available in index directory for Bookie to create Index File while
* replaying journal at the time of Bookie Start in Readonly Mode (in bytes).
// Bookie以readonly模式启动,replay journal时在index目录中创建index文件需要的磁盘最小可用值
this.minUsableSizeForIndexFileCreation = conf.getMinUsableSizeForIndexFileCreation();
// 在ledger 目录中创建entry log 文件所需要的的磁盘最小可用值
this.minUsableSizeForEntryLogCreation = conf.getMinUsableSizeForEntryLogCreation();
// 初始每个目录的使用率都是0,此时在statsLogger中初始化每个目录的使用率指标
for (File dir : ledgerDirectories) {
diskUsages.put(dir, 0f);
String statName = "dir_" + dir.getParent().replace('/', '_') + "_usage";
final File targetDir = dir;
statsLogger.registerGauge(statName, new Gauge<Number>() {
@Override
public Number getDefaultValue() {
return 0;
}
@Override
public Number getSample() {
return diskUsages.get(targetDir) * 100;
}
});
}
// diskChecker实际执行目录使用检测的工具类
this.diskChecker = diskChecker;
// 注册可读目录数目的指标
statsLogger.registerGauge(LD_WRITABLE_DIRS, new Gauge<Number>() {
@Override
public Number getDefaultValue() {
return 0;
}
@Override
public Number getSample() {
return writableLedgerDirectories.size();
}
});
}
执行检测的类是DiskChecker
// 有两个参数 diskUsageThreshold 和 diskUsageWarnThreshold
// 1. diskUsageThreshold 表示磁盘的最大使用率,默认是0.95,目录列表中的所有目录都超过限制之后
// 如果bookie配置可以以readonly模式运行,就会转化为readonly状态,否则会停止;
// 2. diskUsageWarnThreshold 表示磁盘使用的告警阈值,默认是0.90,超过这个值会抛出
// DiskWarnThresholdException,并且会触发gc,当使用率低于这个值时,目录重新变为开写状态
public DiskChecker(float threshold, float warnThreshold) {
validateThreshold(threshold, warnThreshold);
this.diskUsageThreshold = threshold;
this.diskUsageWarnThreshold = warnThreshold;
}
...
// 实际执行检查的逻辑
// 1. 如果磁盘使用率不超过两个限制,则返回使用率;
// 2. 如果超过使用diskUsageThreshold限制,则返回 DiskOutOfSpaceException
// 3. 如果超过使用 diskUsageWarnThreshold 限制,则返回 DiskWarnThresholdException
float checkDiskFull(File dir) throws DiskOutOfSpaceException, DiskWarnThresholdException {
if (null == dir) {
return 0f;
}
if (dir.exists()) {
long usableSpace = dir.getUsableSpace();
long totalSpace = dir.getTotalSpace();
float free = (float) usableSpace / (float) totalSpace;
float used = 1f - free;
if (used > diskUsageThreshold) {
LOG.error("Space left on device {} : {}, Used space fraction: {} > threshold {}.",
dir, usableSpace, used, diskUsageThreshold);
throw new DiskOutOfSpaceException("Space left on device "
+ usableSpace + " Used space fraction:" + used + " > threshold " + diskUsageThreshold, used);
}
// Warn should be triggered only if disk usage threshold doesn't trigger first.
if (used > diskUsageWarnThreshold) {
LOG.warn("Space left on device {} : {}, Used space fraction: {} > WarnThreshold {}.",
dir, usableSpace, used, diskUsageWarnThreshold);
throw new DiskWarnThresholdException("Space left on device:"
+ usableSpace + " Used space fraction:" + used + " > WarnThreshold:" + diskUsageWarnThreshold,
used);
}
return used;
} else {
return checkDiskFull(dir.getParentFile());
}
}
检测逻辑主要是根据两个参数,一个最大磁盘使用上限,一个磁盘告警上限。