Dubbo时间轮算法
什么是时间轮
时间轮,是一种高效的、批量管理定时任务的调度模型。
在Dubbo中,对时间轮的应用主要体现在如下两个方面:
- 失败重试: 例如,Provider 向注册中心进行注册失败时的重试操作,或是 Consumer 向注册中心订阅时的失败重试等;
- 周期性定时任务: 例如,定期发送心跳请求,请求超时的处理,或是网络连接断开后的重连机制。
时间轮算法原理
时间轮是一种环形结构,分为很多槽,一个槽代表一个时间间隔,每个槽使用双向链表存储定时任务,指针周期性地跳动,跳动到一个槽位,就执行该槽位的定时任务。通过时间轮算法,可以将定时任务的存取操作以及取消操作的时间复杂度降为 O(1),非常适合海量定时任务的调度管理。
结构示意图如下:
源码解析
类结构
类路径 org\apache\dubbo\common\timer
一共四个包含四个核心类:
- Timer:时间轮调度器,该接口提供了两个核心方法:创建任务 newTimeout() 、停止所有未执行任务 stop();
- TimerTask:时间轮任务,所有的定时任务都要继承该接口;
- Timeout:与 TimerTask 对象是一对一的关系,两者的关系类似于线程池返回的 Future 对象与提交到线程池中的任务对象。通过 Timeout 对象,不仅可以查看定时任务的状态,还可以取消定时任务;
- HashedWheelTimer:Timer接口的时间轮算法实现类。
初始化时间轮
初始化参数
- threadFactory:线程工厂,可以指定线程的名称和是否是守护进程。
- tickDuration:两个 tick 之间的时间间隔。
- unit:tickDuration 的时间单位。
- ticksPerWheel:时间轮里面的 tick 的个数。默认512。
- maxPendingTimeouts:时间轮中最大等待任务的个数。默认-1 不限制个数。
下面看初始化代码
public HashedWheelTimer(
ThreadFactory threadFactory,
long tickDuration, TimeUnit unit, int ticksPerWheel,
long maxPendingTimeouts) {
if (threadFactory == null) {
throw new NullPointerException("threadFactory");
}
if (unit == null) {
throw new NullPointerException("unit");
}
if (tickDuration <= 0) {
throw new IllegalArgumentException("tickDuration must be greater than 0: " + tickDuration);
}
if (ticksPerWheel <= 0) {
throw new IllegalArgumentException("ticksPerWheel must be greater than 0: " + ticksPerWheel);
}
// 初始化时间轮大小为2的n次方
wheel = createWheel(ticksPerWheel);
// 用于计算时间轮槽位,方便位运算,效率高于取模
mask = wheel.length - 1;
// Convert tickDuration to nanos.
this.tickDuration = unit.toNanos(tickDuration);
// Prevent overflow.
if (this.tickDuration >= Long.MAX_VALUE / wheel.length) {
throw new IllegalArgumentException(String.format(
"tickDuration: %d (expected: 0 < tickDuration in nanos < %d",
tickDuration, Long.MAX_VALUE / wheel.length));
}
// 创建工作线程
workerThread = threadFactory.newThread(worker);
this.maxPendingTimeouts = maxPendingTimeouts;
// 如果当前系统内时间轮实例数量大于64,那么error日志提示
if (INSTANCE_COUNTER.incrementAndGet() > INSTANCE_COUNT_LIMIT &&
WARNED_TOO_MANY_INSTANCES.compareAndSet(false, true)) {
reportTooManyInstances();
}
}
private static HashedWheelBucket[] createWheel(int ticksPerWheel) {
if (ticksPerWheel <= 0) {
throw new IllegalArgumentException(
"ticksPerWheel must be greater than 0: " + ticksPerWheel);
}
if (ticksPerWheel > 1073741824) {
throw new IllegalArgumentException(
"ticksPerWheel may not be greater than 2^30: " + ticksPerWheel);
}
// 时间轮里面的数组大小为2的n次方, x % wheel.length = x & (wheel.length - 1)
ticksPerWheel = normalizeTicksPerWheel(ticksPerWheel);
HashedWheelBucket[] wheel = new HashedWheelBucket[ticksPerWheel];
for (int i = 0; i < wheel.length; i++) {
wheel[i] = new HashedWheelBucket();
}
return wheel;
}
private static int normalizeTicksPerWheel(int ticksPerWheel) {
int normalizedTicksPerWheel = ticksPerWheel - 1;
normalizedTicksPerWheel |= normalizedTicksPerWheel >>> 1;
normalizedTicksPerWheel |= normalizedTicksPerWheel >>> 2;
normalizedTicksPerWheel |= normalizedTicksPerWheel >>> 4;
normalizedTicksPerWheel |= normalizedTicksPerWheel >>> 8;
normalizedTicksPerWheel |= normalizedTicksPerWheel >>> 16;
return normalizedTicksPerWheel + 1;
}
其中mask=wheel.length - 1。mask用到的地方在HashedWheelTimer.Worker#run方法,用于计算在时间轮中 槽位也就是数组的下标,这个后面会讲到。
开始调用时间轮
开始调用是通过newTimeout方法
public Timeout newTimeout(TimerTask task, long delay, TimeUnit unit) {
if (task == null) {
throw new NullPointerException("task");
}
if (unit == null) {
throw new NullPointerException("unit");
}
long pendingTimeoutsCount = pendingTimeouts.incrementAndGet();
// 等待执行的任务数量 > 初始化设定的任务最大值, 则异常
if (maxPendingTimeouts > 0 && pendingTimeoutsCount > maxPendingTimeouts) {
pendingTimeouts.decrementAndGet();
throw new RejectedExecutionException("Number of pending timeouts ("
+ pendingTimeoutsCount + ") is greater than or equal to maximum allowed pending "
+ "timeouts (" + maxPendingTimeouts + ")");
}
// 重点逻辑
start();
// 计算下一次执行时,startTime是在工作线程执行时进行初始化的时间
long deadline = System.nanoTime() + unit.toNanos(delay) - startTime;
// 溢出处理
if (delay > 0 && deadline < 0) {
deadline = Long.MAX_VALUE;
}
// 加入到时间轮待执行任务队列中
HashedWheelTimeout timeout = new HashedWheelTimeout(this, task, deadline);
timeouts.add(timeout);
return timeout;
}
下面重点看下start方法。
public void start() {
// WORKER_STATE_UPDATER 是维护当前HashedWheelTimer的状态, 0:初始化,1:已启动,2:已关闭
switch (WORKER_STATE_UPDATER.get(this)) {
// 初始化状态的话,通过CAS设置成1已启动, 同时启动work工作线程
case WORKER_STATE_INIT:
if (WORKER_STATE_UPDATER.compareAndSet(this, WORKER_STATE_INIT, WORKER_STATE_STARTED)) {
workerThread.start();
}
break;
case WORKER_STATE_STARTED:
break;
case WORKER_STATE_SHUTDOWN:
throw new IllegalStateException("cannot be started once stopped");
default:
throw new Error("Invalid WorkerState");
}
// 如果startTime=0,代表还没初始化, 那么await等待.
while (startTime == 0) {
try {
startTimeInitialized.await();
} catch (InterruptedException ignore) {
// Ignore - it will be ready very soon.
}
}
}
其中startTimeInitialized是一个CountDownLatch门闩,在work线程启动时,会进行countDown,唤醒此处的await等待。
下面就该看一下时间轮work线程逻辑
工作线程处理流程
下面看HashedWheelTimer.Worker工作线程
private final class Worker implements Runnable {
private final Set<Timeout> unprocessedTimeouts = new HashSet<Timeout>();
private long tick;
@Override
public void run() {
// 初始化startTime=work线程执行时间
startTime = System.nanoTime();
if (startTime == 0) {
// We use 0 as an indicator for the uninitialized value here, so make sure it's not 0 when initialized.
startTime = 1;
}
// 前面在分析start()方法的时候线程还在await等待此处的唤醒
startTimeInitialized.countDown();
// while循环根据WORKER_STATE_UPDATER判断如果是1启动状态,那么就一直处理。 除非时间轮被stop才会跳出循环,走到下面的逻辑。
do {
// 等待下一个tick时间刻度
final long deadline = waitForNextTick();
if (deadline > 0) {
// 计算槽位
int idx = (int) (tick & mask);
// remove掉已取消的任务
processCancelledTasks();
// 获取到bucket
HashedWheelBucket bucket =
wheel[idx];
// 每次至多10000个timeout添加到bucket中
transferTimeoutsToBuckets();
// 遍历bucket执行过期方法
bucket.expireTimeouts(deadline);
tick++;
}
} while (WORKER_STATE_UPDATER.get(HashedWheelTimer.this) == WORKER_STATE_STARTED);
// 填充未执行的timeout列表 unprocessedTimeouts返回给stop()方法.
for (HashedWheelBucket bucket : wheel) {
bucket.clearTimeouts(unprocessedTimeouts);
}
for (; ; ) {
HashedWheelTimeout timeout = timeouts.poll();
if (timeout == null) {
break;
}
if (!timeout.isCancelled()) {
unprocessedTimeouts.add(timeout);
}
}
// remove掉已取消的任务
processCancelledTasks();
}
}
transferTimeoutsToBuckets方法是将
timeouts队列分发到时间轮具体槽位中
private void transferTimeoutsToBuckets() {
// 每次从timeouts中取出10000个进行处理
for (int i = 0; i < 100000; i++) {
// 从timeouts队列中获取
HashedWheelTimeout timeout = timeouts.poll();
if (timeout == null) {
// all processed
break;
}
// 忽略已取消的
if (timeout.state() == HashedWheelTimeout.ST_CANCELLED) {
// Was cancelled in the meantime.
continue;
}
long calculated = timeout.deadline / tickDuration;
// 计算剩余的时间轮圈数
timeout.remainingRounds = (calculated - tick) / wheel.length;
// Ensure we don't schedule for past.
final long ticks = Math.max(calculated, tick);
int stopIndex = (int) (ticks & mask);
// 将timeout加入到bucket
HashedWheelBucket bucket = wheel[stopIndex];
bucket.addTimeout(timeout);
}
}
然后再执行过期方法HashedWheelBucket#expireTimeouts
void expireTimeouts(long deadline) {
HashedWheelTimeout timeout = head;
// 从头开始遍历bucket,处理所有timeouts
while (timeout != null) {
HashedWheelTimeout next = timeout.next;
// 如果timeout的剩余圈数小于等于0那么说明可以开始执行了
if (timeout.remainingRounds <= 0) {
// 将timeout从链表中移除
next = remove(timeout);
if (timeout.deadline <= deadline) {
// 执行过期方法
timeout.expire();
} else {
// The timeout was placed into a wrong slot. This should never happen.
throw new IllegalStateException(String.format(
"timeout.deadline (%d) > deadline (%d)", timeout.deadline, deadline));
}
} else if (timeout.isCancelled()) {
// 删除已取消的timeout
next = remove(timeout);
} else {
// 还没到执行时间,还在后面的轮数
timeout.remainingRounds--;
}
timeout = next;
}
}
HashedWheelTimeout#expire
public void expire() {
// 通过CAS将timeout方法从初始化状态转换成已过期
if (!compareAndSetState(ST_INIT, ST_EXPIRED)) {
return;
}
try {
// 任务执行
task.run(this);
} catch (Throwable t) {
if (logger.isWarnEnabled()) {
logger.warn(COMMON_ERROR_RUN_THREAD_TASK, "", "", "An exception was thrown by " + TimerTask.class.getSimpleName() + '.', t);
}
}
}