快照限流器ThroughputSpanshotThrottle

braft中的快照限流器的实现

// snapshot_throttle.h
#ifndef  BRAFT_SNAPSHOT_THROTTLE_H
#define  BRAFT_SNAPSHOT_THROTTLE_H

#include <butil/memory/ref_counted.h>                // butil::RefCountedThreadSafe
#include "util.h"

namespace braft {

// Abstract class with the function of throttling during heavy disk reading/writing
// 在繁重的磁盘读写期间限制快照加载的速度
class SnapshotThrottle : public butil::RefCountedThreadSafe<SnapshotThrottle> {
public:
    SnapshotThrottle() {}
    virtual ~SnapshotThrottle() {}
    // Get available throughput after throttled 
    // Must be thread-safe
    virtual size_t throttled_by_throughput(int64_t bytes) = 0;
    virtual bool add_one_more_task(bool is_leader) = 0;
    virtual void finish_one_task(bool is_leader) = 0;
    virtual int64_t get_retry_interval_ms() = 0;

    // After a throttled request finish, |return_unused_throughput| is called to
    // return back unsed tokens (throghput). Default implementation do nothing.
    // 在一个限流请求完成后,|return_unused_throughput| 会被调用来返回未使用的tokens
    // 默认的实现实现是什么也不做
    // There are two situations we can optimize:
    // 优化方案
    // case 1: The follower and leader both try to throttle the same request, and
    //         only one of them permit the request. No actual IO and bandwidth consumed,
    //         the acquired tokens are wasted.
    //         follower和leader都尝试对同一个请求进行限流,并且只有它们当中的一个允许了这个请求,
    //         没有实际的IO和带宽被消费,但是获取的token会被浪费
    // case 2: We acquired some tokens, but only part of them are used, because of
    //         the file reach the eof, or the file contains holes.
    //         我门获取一些token,但是只有它们中的一部分能够被使用,
    //         因为文件到达了尾部eof,或者文件破损了
    virtual void return_unused_throughput(
            int64_t acquired, int64_t consumed, int64_t elaspe_time_us) {}
private:
    DISALLOW_COPY_AND_ASSIGN(SnapshotThrottle);
    friend class butil::RefCountedThreadSafe<SnapshotThrottle>;
};

// SnapshotThrottle with throughput threshold used in install_snapshot
// 主要用于控制安装快照操作时的吞吐量
// 以避免因快照传输过快而对系统造成过大压力
class ThroughputSnapshotThrottle : public SnapshotThrottle {
public:
    // trhottle_throughput_bytes 吞吐量阈值
    // check_cycle 检查周期
    ThroughputSnapshotThrottle(int64_t throttle_throughput_bytes, int64_t check_cycle);
    int64_t get_throughput() const { return _throttle_throughput_bytes; }
    int64_t get_cycle() const { return _check_cycle; }

    // 根据给定的字节数判断是否需要因吞吐量限制而延迟处理
    size_t throttled_by_throughput(int64_t bytes);

    // 添加一个新的安装快照任务,参数指示是否为领导者节点,可能影响决策逻辑
    bool add_one_more_task(bool is_leader);
    
    // 标记一个安装快照任务完成
    void finish_one_task(bool is_leader);

    // 计算基于检查周期的重试问题, 用于控制尝试频率
    int64_t get_retry_interval_ms() { return 1000 / _check_cycle + 1;}

    // 回收未使用的吞吐量配额,根据已获取,已消费的字节数以及经过的时间
    void return_unused_throughput(
            int64_t acquired, int64_t consumed, int64_t elaspe_time_us);

private:
    ~ThroughputSnapshotThrottle();
    // user defined throughput threshold for raft, bytes per second
    int64_t _throttle_throughput_bytes;     // 用户定义的吞吐量阈值
    // user defined check cycles of throughput per second
    int64_t _check_cycle;                   // 用户定义的每秒检查吞吐量的周期次数
    // the num of tasks doing install_snapshot
    int _snapshot_task_num;                 // 当前正在执行的安装块中的任务的数量
    int64_t _last_throughput_check_time_us; // 检查吞吐量的时间戳
    int64_t _cur_throughput_bytes;          // 当前周期内记录的吞吐量字节数
    raft_mutex_t _mutex;
};

inline int64_t caculate_check_time_us(int64_t current_time_us, 
        int64_t check_cycle) {
    int64_t base_aligning_time_us = 1000 * 1000 / check_cycle;
    return current_time_us / base_aligning_time_us * base_aligning_time_us;
}

} //  namespace braft

#endif  // BRAFT_SNAPSHOT_THROTTLE_H

// snapshot_throttle.cpp
#include <butil/time.h>
#include <gflags/gflags.h>
#include <brpc/reloadable_flags.h>
#include "snapshot_throttle.h"
#include "util.h"

namespace braft {

// used to increase throttle threshold dynamically when user-defined
// threshold is too small in extreme cases.
// 当用户定义的阈值在极端情况下太小时,被用于动态增加限流阈值
// notice that this flag does not distinguish disk types(sata or ssd, and so on)
// 注意:这个flag不会区别磁盘的类型

// 每秒的最小限流阈值
DEFINE_int64(raft_minimal_throttle_threshold_mb, 0,
            "minimal throttle throughput threshold per second");

BRPC_VALIDATE_GFLAG(raft_minimal_throttle_threshold_mb,
                    brpc::NonNegativeInteger);
// 每个磁盘在同一时间能够执行的最大安装快照
DEFINE_int32(raft_max_install_snapshot_tasks_num, 1000, 
             "Max num of install_snapshot tasks per disk at the same time");
BRPC_VALIDATE_GFLAG(raft_max_install_snapshot_tasks_num, 
                    brpc::PositiveInteger);

ThroughputSnapshotThrottle::ThroughputSnapshotThrottle(
        int64_t throttle_throughput_bytes, int64_t check_cycle) 
    : _throttle_throughput_bytes(throttle_throughput_bytes)
    , _check_cycle(check_cycle)
    , _snapshot_task_num(0)
    , _last_throughput_check_time_us(
            caculate_check_time_us(butil::cpuwide_time_us(), check_cycle))
    , _cur_throughput_bytes(0)
{}

ThroughputSnapshotThrottle::~ThroughputSnapshotThrottle() {}

// 根据给定的字节数判断是否需要因吞吐量限制而延迟处理
size_t ThroughputSnapshotThrottle::throttled_by_throughput(int64_t bytes) {
    size_t available_size = bytes;              // 有效的字节数
    int64_t now = butil::cpuwide_time_us();     // 当前的cpu时间
    // 限流阈值
    int64_t limit_throughput_bytes_s = std::max(_throttle_throughput_bytes,
                FLAGS_raft_minimal_throttle_threshold_mb * 1024 *1024);
    // 每个周期的限流阈值
    int64_t limit_per_cycle = limit_throughput_bytes_s / _check_cycle;
    std::unique_lock<raft_mutex_t> lck(_mutex);
    if (_cur_throughput_bytes + bytes > limit_per_cycle) {
        // reading another |bytes| excceds the limit
        // 超出阈值需要进行限流
        if (now - _last_throughput_check_time_us <= 
            1 * 1000 * 1000 / _check_cycle) {
            // if time interval is less than or equal to a cycle, read more data
            // to make full use of the throughput of current cycle.
            // 如果时间间隔小于或等于一个周期,读更多数据来充分利用当前周期的吞吐量
            // ps:直接将当前周期的吞吐量拉满
            available_size = limit_per_cycle > _cur_throughput_bytes ? limit_per_cycle - _cur_throughput_bytes : 0;
            _cur_throughput_bytes = limit_per_cycle;
        } else {
            // otherwise, read the data in the next cycle.
            // 在下一个周期来读取
            available_size = bytes > limit_per_cycle ? limit_per_cycle : bytes;
            _cur_throughput_bytes = available_size;
            _last_throughput_check_time_us = 
                caculate_check_time_us(now, _check_cycle);
        }
    } else {
        // reading another |bytes| doesn't excced limit(less than or equal to), 
        // put it in current cycle
        // 不会超过指定的阈值
        available_size = bytes;
        _cur_throughput_bytes += available_size;
    }
    lck.unlock();
    return available_size;
}

// raft协议的设计哲学倾向于让领导者承担更多的责任和控制权,以保证集群状态的一致性和高效性。
// 1.领导者的职责:在Raft协议中,领导者负责日志复制,心跳维持以及快照分发等关键操作,为了保证集群
//                的活跃性和数据的及时同步,领导者常被赋予更高的优先级和更多的资源使用权
// 2.资源优化:领导者直接与所有跟随着通信,控制着整个集群的数据流。如果对领导者进行严格的吞吐量限制,
//            可能会导致整体数据同步下降,延长日志同步时间,影响系统的响应速度和数据一致性
// 3.负载均衡和故障恢复:领导者可以通过控制快照分发的节奏来间接实现对这个集群的负载管理。
//                     在快照安装过程中,如果所有节点都进行严格的吞吐量限制,可能会在高负载的情况下
//                     加剧网络拥塞,特别是在领导者需要快速恢复跟随者状态或加入新节点时。
bool ThroughputSnapshotThrottle::add_one_more_task(bool is_leader) { 
    // Don't throttle leader, let follower do it
    if (is_leader) {
        return true;
    }
    int task_num_threshold = FLAGS_raft_max_install_snapshot_tasks_num;

    std::unique_lock<raft_mutex_t> lck(_mutex);
    int saved_task_num = _snapshot_task_num;
    if (_snapshot_task_num >= task_num_threshold) {
        lck.unlock();
        LOG(WARNING) << "Fail to add one more task when current task num is: " 
                     << saved_task_num << ", task num threshold: " << task_num_threshold;
        return false;
    }
    saved_task_num = ++_snapshot_task_num;
    lck.unlock();

    LOG(INFO) << "Succed to add one more task, new task num is: " << saved_task_num
              << ", task num threshold: " << task_num_threshold;
    return true;
}

void ThroughputSnapshotThrottle::finish_one_task(bool is_leader) {
    if (is_leader) {
        return;
    }
    std::unique_lock<raft_mutex_t> lck(_mutex);
    int saved_task_num = --_snapshot_task_num;
    // _snapshot_task_num should not be negative
    CHECK_GE(_snapshot_task_num, 0) << "Finishing task cause wrong task num: "
                                    << saved_task_num;
    lck.unlock();
    LOG(INFO) << "Finish one task, new task num is: " << saved_task_num;
    return;
}

// 已获取的吞吐量
// 已消费的吞吐量
// 上次检查时间到现在的时间间隔
void ThroughputSnapshotThrottle::return_unused_throughput(
            int64_t acquired, int64_t consumed, int64_t elaspe_time_us) {
    int64_t now = butil::cpuwide_time_us();
    std::unique_lock<raft_mutex_t> lck(_mutex);
    if (now - elaspe_time_us < _last_throughput_check_time_us) {
        // Tokens are aqured in last cycle, ignore
        return;
    }
    // 表示一段时间已经过去,需要更新吞吐量
    _cur_throughput_bytes = std::max(
            _cur_throughput_bytes - (acquired - consumed), int64_t(0));
}

}  //  namespace braft

  • 2
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值