指数退避算法
退避算法是以冲突窗口大小为基准的,每个节点有一个冲突计数器C。退避的时间与冲突次数具有指数关系,冲突次数越多,退避的时间就可能越长,若达到限定的冲突次数,该节点就停止发送数据。
下面举例说明二进制指数退避算法。
其算法过程如下:
- 确定基本退避时间,一般为端到端的往返时间为2t,2t也称为冲突窗口或争用期。
- 定义参数k(factor)作为指标,k与冲突次数有关,规定k不能超过10。在冲突次数大于10,小于16时,k不再增大,一直取值为10。
- 从离散的整数集合[0,1,2,……,( $ 2^k-1 $)]中随机的取出一个数r,等待的时延为r倍的基本退避时间,等于
r x 2t
。(r的取值范围与冲突次数k有关,r可选的随机取值为 2 k 2^k 2k个、这也是称为二进制退避算法的起因。) - 当冲突次数大于10以后,都是从$ 0- 2^{10} - 1 $个2t中随机选择一个作为等待时间。
- 当冲突次数超过16次后,发送失败,丢弃传输的帧,发送错误报告。
具体实现
数据库创建参数
CREATE TABLE `t_transactional_message`{
id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
create_time DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
edit_time DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
creator VARCHAR(20) NOT NULL DEFAULT 'admin',
editor VARCHAR(20) NOT NULL DEFAULT 'admin',
deleted TINYINT NOT NULL DEFAULT 0,
current_retry_times TINYINT NOT NULL DEFAULT 0 COMMENT '当前重试次数',
max_retry_times TINYINT NOT NULL DEFAULT 0 COMMENT '最大重试次数',
next_schedule_time DATETIME NOT NULL COMMENT '下一次调度时间',
message_status TINYINT NOT NULL DEFAULT 0 COMMENT '消息状态',
init_backoff BIGINT UNSIGNED NOT NULL DEFAULT 5 COMMENT '退避初始化值',
backoff_factor TINYINT NOT NULL DEFAULT 2 COMMENT '退避因子(也就是指数)',
}
//参考存储对象
@Data
public class TransactionalMessage {
private Long id;
private LocalDateTime createTime;
private LocalDateTime editTime;
private String creator;
private String editor;
private Integer deleted;
private Integer currentRetryTimes;
private Integer maxRetryTimes;
private String queueName;
private String exchangeName;
private String exchangeType;
private String routingKey;
private String businessModule;
private String businessKey;
private LocalDateTime nextScheduleTime;
private Integer messageStatus;
private Long initBackoff;
private Integer backoffFactor;
}
在需要的错误重试的地方,初始算法,存入到数据库或者redis中
private static final long DEFAULT_INIT_BACKOFF = 10L;
private static final int DEFAULT_BACKOFF_FACTOR = 2;
private static final int DEFAULT_MAX_RETRY_TIMES = 5;
private static final int LIMIT = 100;
//数据库初始保存代码
public void saveTransactionalMessageRecord(TransactionalMessage record, String content) {
record.setMessageStatus(TxMessageStatus.PENDING.getStatus());
double delta = DEFAULT_INIT_BACKOFF * Math.pow(DEFAULT_BACKOFF_FACTOR, 0);
LocalDateTime nextScheduleTime =LocalDateTime.now().plusSeconds((long) delta);
record.setNextScheduleTime(nextScheduleTime);
record.setCurrentRetryTimes(0);
record.setInitBackoff(DEFAULT_INIT_BACKOFF);
record.setBackoffFactor(DEFAULT_BACKOFF_FACTOR);
record.setMaxRetryTimes(DEFAULT_MAX_RETRY_TIMES);
messageDao.insertSelective(record);
TransactionalMessageContent messageContent = new TransactionalMessageContent();
messageContent.setContent(content);
messageContent.setMessageId(record.getId());
contentDao.insert(messageContent);
}
// 接着在完成后,注册成功,或者失败 ,
//发送成功
private void markSuccess(TransactionalMessage record) {
// 标记下一次执行时间为最大值
record.setNextScheduleTime(null);
record.setCurrentRetryTimes(record.getCurrentRetryTimes().compareTo(record.getMaxRetryTimes()) >= 0 ?
record.getMaxRetryTimes() : record.getCurrentRetryTimes() + 1);
record.setMessageStatus(TxMessageStatus.SUCCESS.getStatus());
record.setEditTime(LocalDateTime.now());
messageDao.updateStatusSelective(record);
}
private void markFail(TransactionalMessage record, Exception e) {
log.error("发送消息失败,目标队列:{}", record.getQueueName(), e);
record.setCurrentRetryTimes(record.getCurrentRetryTimes().compareTo(record.getMaxRetryTimes()) >= 0 ?
record.getMaxRetryTimes() : record.getCurrentRetryTimes() + 1);
// 计算下一次的执行时间
LocalDateTime nextScheduleTime =
double delta = record.getInitBackoff() * Math.pow(record.getBackoffFactor(), record.getCurrentRetryTimes());
record.getNextScheduleTime().plusSeconds((long) delta);
LocalDateTime nextScheduleTime = record.getNextScheduleTime().plusSeconds((long) delta);
record.setNextScheduleTime(nextScheduleTime);
record.setMessageStatus(TxMessageStatus.FAIL.getStatus());
record.setEditTime(LocalDateTime.now());
messageDao.updateStatusSelective(record);
}
如何注册,消息队列可以参考使用TransactionSynchronizationManager.registerSynchronization
的 aftercommit
进行注册,参考代码
TransactionSynchronizationManager.registerSynchronization(new TransactionSynchronizationAdapter() {
@Override
public void afterCommit() {
try {
rabbitTemplate.convertAndSend(record.getExchangeName(), record.getRoutingKey(), content);
if (log.isDebugEnabled()) {
log.debug("发送消息成功,目标队列:{},消息内容:{}", record.getQueueName(), content);
}
// 标记成功
markSuccess(record);
} catch (Exception e) {
// 标记失败
markFail(record, e);
}
});
还要配合定时事务,不断地从数据库或者redis 中定初始间隔时间2t
继续 轮询查看是否该项事务完成。
@Slf4j
@RequiredArgsConstructor
@Configuration
@EnableScheduling
public class ScheduleJobAutoConfiguration {
private final TransactionalMessageManagementService managementService;
/**
* 这里用的是本地的Redis,实际上要做成配置
*/
private final RedissonClient redisson = Redisson.create();
@Scheduled(fixedDelay = 10000)
public void transactionalMessageCompensationTask() throws Exception {
RLock lock = redisson.getLock("transactionalMessageCompensationTask");
// 等待时间5秒,预期300秒执行完毕,这两个值需要按照实际场景定制
boolean tryLock = lock.tryLock(5, 300, TimeUnit.SECONDS);
if (tryLock) {
try {
long start = System.currentTimeMillis();
log.info("开始执行事务消息推送补偿定时任务...");
processPendingCompensationRecords();
long end = System.currentTimeMillis();
long delta = end - start;
// 以防锁过早释放
if (delta < 5000) {
Thread.sleep(5000 - delta);
}
log.info("执行事务消息推送补偿定时任务完毕,耗时:{} ms...", end - start);
} finally {
lock.unlock();
}
}
}
}
/**
* 推送补偿 - 里面的参数应该根据实际场景定制
*/
public void processPendingCompensationRecords() {
// 这里预防把刚保存的消息也推送了
LocalDateTime max = LocalDateTime.now().plusSeconds(-DEFAULT_INIT_BACKOFF);
LocalDateTime min = max.plusHours(-1);
//jdbcTemplate.query("SELECT * FROM t_transactional_message WHERE next_schedule_time >= ? " +
// "AND next_schedule_time <= ? AND message_status <> ? AND current_retry_times < max_retry_times LIMIT ?",
// p -> {
// p.setTimestamp(1, Timestamp.valueOf(minScheduleTime));
// p.setTimestamp(2, Timestamp.valueOf(maxScheduleTime));
// p.setInt(3, TxMessageStatus.SUCCESS.getStatus());
// p.setInt(4, limit);
// },
// MULTI);
Map<Long, TransactionalMessage> collect = messageDao.queryPendingCompensationRecords(min, max, LIMIT)
.stream()
.collect(Collectors.toMap(TransactionalMessage::getId, x -> x));
if (!collect.isEmpty()) {
StringJoiner joiner = new StringJoiner(",", "(", ")");
collect.keySet().forEach(x -> joiner.add(x.toString()));
contentDao.queryByMessageIds(joiner.toString())
.forEach(item -> {
// 重试发送
// TransactionalMessage message = collect.get(item.getMessageId());
// sendMessageSync(message, item.getContent());
});
}
}