19.1 接收并存储
上一章讲了客户端的处理,这一章讲一下Broker端的处理,从上一章可以得出,处理请求码为 RequestCode.CONSUMER_SEND_MSG_BACK,通过源码,直接找到SendMessageProcessor.processRequest方法
public RemotingCommand processRequest(ChannelHandlerContext ctx,
RemotingCommand request) throws RemotingCommandException {
SendMessageContext mqtraceContext;
switch (request.getCode()) {
case RequestCode.CONSUMER_SEND_MSG_BACK:
// 消息重推的,调这个方法,继续走进这个方法
return this.consumerSendMsgBack(ctx, request);
default: // 下面这块,是属于正常消息发送的
SendMessageRequestHeader requestHeader = parseRequestHeader(request);
if (requestHeader == null) {
return null;
}
mqtraceContext = buildMsgContext(ctx, requestHeader);
this.executeSendMessageHookBefore(ctx, request, mqtraceContext);
RemotingCommand response;
if (requestHeader.isBatch()) {
response = this.sendBatchMessage(ctx, request, mqtraceContext, requestHeader);
} else {
response = this.sendMessage(ctx, request, mqtraceContext, requestHeader);
}
this.executeSendMessageHookAfter(response, mqtraceContext);
return response;
}
}
SendMessageProcessor.consumerSendMsgBack
private RemotingCommand consumerSendMsgBack(final ChannelHandlerContext ctx, final RemotingCommand request)
throws RemotingCommandException {
// 响应命令
final RemotingCommand response = RemotingCommand.createResponseCommand(null);
// broker内部请求头封装
final ConsumerSendMsgBackRequestHeader requestHeader =
(ConsumerSendMsgBackRequestHeader)request.decodeCommandCustomHeader(ConsumerSendMsgBackRequestHeader.class);
// 拿到 namespace
String namespace = NamespaceUtil.getNamespaceFromResource(requestHeader.getGroup());
// 钩子调用,先忽略吧
if (this.hasConsumeMessageHook() && !UtilAll.isBlank(requestHeader.getOriginMsgId())) {
ConsumeMessageContext context = new ConsumeMessageContext();
context.setNamespace(namespace);
context.setConsumerGroup(requestHeader.getGroup());
context.setTopic(requestHeader.getOriginTopic());
context.setCommercialRcvStats(BrokerStatsManager.StatsType.SEND_BACK);
context.setCommercialRcvTimes(1);
context.setCommercialOwner(request.getExtFields().get(BrokerStatsManager.COMMERCIAL_OWNER));
this.executeConsumeMessageHookAfter(context);
}
// 从本地获取或创建订阅组配置对象
SubscriptionGroupConfig subscriptionGroupConfig =
this.brokerController.getSubscriptionGroupManager().findSubscriptionGroupConfig(requestHeader.getGroup());
// 判空
if (null == subscriptionGroupConfig) {
response.setCode(ResponseCode.SUBSCRIPTION_GROUP_NOT_EXIST);
response.setRemark("subscription group not exist, " + requestHeader.getGroup() + " "
+ FAQUrl.suggestTodo(FAQUrl.SUBSCRIPTION_GROUP_NOT_EXIST));
return response;
}
// 访问权限判断
if (!PermName.isWriteable(this.brokerController.getBrokerConfig().getBrokerPermission())) {
response.setCode(ResponseCode.NO_PERMISSION);
response.setRemark("the broker[" + this.brokerController.getBrokerConfig().getBrokerIP1() + "] sending message is forbidden");
return response;
}
// 重试队列数必须大于0
if (subscriptionGroupConfig.getRetryQueueNums() <= 0) {
response.setCode(ResponseCode.SUCCESS);
response.setRemark(null);
return response;
}
// 新的topic:"%RETRY%"+{group},也就是说一个消费者用的重试消息,全部用同一个topic
String newTopic = MixAll.getRetryTopic(requestHeader.getGroup());
// 计算queueId,默认重试消息的队列只有一个,所以也是1
int queueIdInt = Math.abs(this.random.nextInt() % 99999999) % subscriptionGroupConfig.getRetryQueueNums();
int topicSysFlag = 0;
if (requestHeader.isUnitMode()) {// 默认false
topicSysFlag = TopicSysFlag.buildSysFlag(false, true);
}
// 创建topic配置对象
TopicConfig topicConfig = this.brokerController.getTopicConfigManager().createTopicInSendMessageBackMethod(
newTopic,
subscriptionGroupConfig.getRetryQueueNums(),
PermName.PERM_WRITE | PermName.PERM_READ, topicSysFlag);
// 判null
if (null == topicConfig) {
response.setCode(ResponseCode.SYSTEM_ERROR);
response.setRemark("topic[" + newTopic + "] not exist");
return response;
}
// 权限判断
if (!PermName.isWriteable(topicConfig.getPerm())) {
response.setCode(ResponseCode.NO_PERMISSION);
response.setRemark(String.format("the topic[%s] sending message is forbidden", newTopic));
return response;
}
// 根据commitlog的物理偏移查找到真实的消息内容
MessageExt msgExt = this.brokerController.getMessageStore().lookMessageByOffset(requestHeader.getOffset());
if (null == msgExt) {
response.setCode(ResponseCode.SYSTEM_ERROR);
response.setRemark("look message by offset failed, " + requestHeader.getOffset());
return response;
}
// 从消息属性中,取出该重试消息的原本topic
final String retryTopic = msgExt.getProperty(MessageConst.PROPERTY_RETRY_TOPIC);
if (null == retryTopic) {// 如果为null
// 把从commitlog中获取到的消息的topic放进属性
MessageAccessor.putProperty(msgExt, MessageConst.PROPERTY_RETRY_TOPIC, msgExt.getTopic());
}
msgExt.setWaitStoreMsgOK(false);
// 消息重推策略,`章节18`有讲过
int delayLevel = requestHeader.getDelayLevel();
// 最大重试次数:默认16
int maxReconsumeTimes = subscriptionGroupConfig.getRetryMaxTimes();
if (request.getVersion() >= MQVersion.Version.V3_4_9.ordinal()) {
maxReconsumeTimes = requestHeader.getMaxReconsumeTimes();
}
// 判断该消息已经重新消费的次数是否超过最大消费次数(16),超过了,就得放到死信队列
if (msgExt.getReconsumeTimes() >= maxReconsumeTimes
|| delayLevel < 0) { // < 0 表示不做重试,直接死信队列待着
// 根据消费者组名创建死信队列的topic
newTopic = MixAll.getDLQTopic(requestHeader.getGroup());
// 计算queueId,默认死信队列只有一个,所以也是1
queueIdInt = Math.abs(this.random.nextInt() % 99999999) % DLQ_NUMS_PER_GROUP;
// 创建死信队列topic配置对象
topicConfig = this.brokerController.getTopicConfigManager().createTopicInSendMessageBackMethod(newTopic,
DLQ_NUMS_PER_GROUP,
PermName.PERM_WRITE, 0
);
if (null == topicConfig) { // 判null
response.setCode(ResponseCode.SYSTEM_ERROR);
response.setRemark("topic[" + newTopic + "] not exist");
return response;
}
} else {
if (0 == delayLevel) { // broker控制策略,那就正常走16次重试
// 这里 +3,先不要懵,后面会讲为什么这里+3
delayLevel = 3 + msgExt.getReconsumeTimes();
}
// 设置延迟级别
msgExt.setDelayTimeLevel(delayLevel);
}
// 开始组装新的消息,重新放入commitlog中,走正常消息的存入和分发逻辑,所以重试消息,每重试一次都会生成一个新的commitlog消息的复本,这样设计的好处是:1.保持原有消息从发送、存储、分发和消费的整套逻辑不变,方便编码与维护;2.保持commitlog文件的顺序读写,提升磁盘IO效率。缺点是:存储空间复本较多,但是实际生产中,重试消息毕竟占少数。
MessageExtBrokerInner msgInner = new MessageExtBrokerInner();
// 新的topic:"%RETRY%"+{group},也就是说一个消费者用的重试消息,全部用同一个topic
msgInner.setTopic(newTopic);
// 消息体
msgInner.setBody(msgExt.getBody());
// 标志
msgInner.setFlag(msgExt.getFlag());
// 属性设置
MessageAccessor.setProperties(msgInner, msgExt.getProperties());
msgInner.setPropertiesString(MessageDecoder.messageProperties2String(msgExt.getProperties()));
msgInner.setTagsCode(MessageExtBrokerInner.tagsString2tagsCode(null, msgExt.getTags()));
// 队列id,其他字段设置的意思自行翻阅前面章节
msgInner.setQueueId(queueIdInt);
msgInner.setSysFlag(msgExt.getSysFlag());
msgInner.setBornTimestamp(msgExt.getBornTimestamp());
msgInner.setBornHost(msgExt.getBornHost());
msgInner.setStoreHost(this.getStoreHost());
msgInner.setReconsumeTimes(msgExt.getReconsumeTimes() + 1);
// 原始消息id
String originMsgId = MessageAccessor.getOriginMessageId(msgExt);
MessageAccessor.setOriginMessageId(msgInner, UtilAll.isBlank(originMsgId) ? msgExt.getMsgId() : originMsgId);
// 开始存储消息,这个方法在`章节11.1`中有讲过,有兴趣的可以回头翻翻,但是针对重试消息,putMessage方法内部有区别处理,接下来我们专门针对这块区别讲解,直接看Commitlog.putMessage方法
PutMessageResult putMessageResult = this.brokerController.getMessageStore().putMessage(msgInner);
if (putMessageResult != null) {
switch (putMessageResult.getPutMessageStatus()) {
case PUT_OK: // 存放ok
String backTopic = msgExt.getTopic();
// 原始消息的topic
String correctTopic = msgExt.getProperty(MessageConst.PROPERTY_RETRY_TOPIC);
if (correctTopic != null) {
backTopic = correctTopic;
}
// 记录该消费者组下原始消息的topic重推消息的总次数
this.brokerController.getBrokerStatsManager().incSendBackNums(requestHeader.getGroup(), backTopic);
// 返回成功的结果
response.setCode(ResponseCode.SUCCESS);
response.setRemark(null);
return response;
default:
break;
}
response.setCode(ResponseCode.SYSTEM_ERROR);
response.setRemark(putMessageResult.getPutMessageStatus().name());
return response;
}
response.setCode(ResponseCode.SYSTEM_ERROR);
response.setRemark("putMessageResult is null");
return response;
}
Commitlog.putMessage
public PutMessageResult putMessage(final MessageExtBrokerInner msg) {
// 省略无关代码
String topic = msg.getTopic();
int queueId = msg.getQueueId();
final int tranType = MessageSysFlag.getTransactionValue(msg.getSysFlag());
if (tranType == MessageSysFlag.TRANSACTION_NOT_TYPE
|| tranType == MessageSysFlag.TRANSACTION_COMMIT_TYPE) {
// > 0,表示是延迟消息(重试)
if (msg.getDelayTimeLevel() > 0) {
if (msg.getDelayTimeLevel() > this.defaultMessageStore.getScheduleMessageService().getMaxDelayLevel()) {
msg.setDelayTimeLevel(this.defaultMessageStore.getScheduleMessageService().getMaxDelayLevel());
}
// 那么就要把topic改成SCHEDULE_TOPIC(固定为:SCHEDULE_TOPIC_XXXX)
topic = ScheduleMessageService.SCHEDULE_TOPIC;
// queueId,也是根据延迟级别-1获得
queueId = ScheduleMessageService.delayLevel2QueueId(msg.getDelayTimeLevel());
// 备份真实的 topic, queueId
MessageAccessor.putProperty(msg, MessageConst.PROPERTY_REAL_TOPIC, msg.getTopic());
MessageAccessor.putProperty(msg, MessageConst.PROPERTY_REAL_QUEUE_ID, String.valueOf(msg.getQueueId()));
msg.setPropertiesString(MessageDecoder.messageProperties2String(msg.getProperties()));
msg.setTopic(topic);
msg.setQueueId(queueId);
}
}
// 省略无关代码
return putMessageResult;
}
19.2 异步任务分发
重试消息是通过定时任务根据指定的间隔时间来执行消息分发后消费的,这一章主要通过源码把这个点讲清楚。直接进入 ScheduleMessageService 类的 start() 的方法,这个方法会在BrokerController启动时调用,下面我们开始看源码。
public void start() {
// cas 保证启动只发生一次
if (started.compareAndSet(false, true)) {
// 创建一个java原生定时器
this.timer = new Timer("ScheduleMessageTimerThread", true);
// 遍历 delayLevelTable,但是默认 delayLevelTable 是空的,这可咋办,其实,这个集合在 BrokerController.initialize 初始化方法中通过调用 ScheduleMessageService.load方法完成初始化工作。
for (Map.Entry<Integer, Long> entry : this.delayLevelTable.entrySet()) {
Integer level = entry.getKey(); // 延迟级别
Long timeDelay = entry.getValue(); // 延迟的毫秒数
Long offset = this.offsetTable.get(level); // 该延迟级别下,已分发的偏移
if (null == offset) {
offset = 0L;// 默认为0
}
if (timeDelay != null) {
// 执行定时任务,进入 DeliverDelayedMessageTimerTask.run 方法
this.timer.schedule(new DeliverDelayedMessageTimerTask(level, offset), FIRST_DELAY_TIME);
}
}
this.timer.scheduleAtFixedRate(new TimerTask() {
@Override
public void run() {
try {
if (started.get()) ScheduleMessageService.this.persist();
} catch (Throwable e) {
log.error("scheduleAtFixedRate flush exception", e);
}
}
}, 10000, this.defaultMessageStore.getMessageStoreConfig().getFlushDelayOffsetInterval());
}
}
ScheduleMessageService.load
public boolean load() {
boolean result = super.load();
result = result && this.parseDelayLevel();
return result;
}
public boolean parseDelayLevel() {
HashMap<String, Long> timeUnitTable = new HashMap<String, Long>();
// 以下都是将天、时、分、秒换算成毫秒来计算的
timeUnitTable.put("s", 1000L); // 秒
timeUnitTable.put("m", 1000L * 60); // 分
timeUnitTable.put("h", 1000L * 60 * 60); // 小时
timeUnitTable.put("d", 1000L * 60 * 60 * 24); // 天
// 默认18个级别 "1s 5s 10s 30s 1m 2m 3m 4m 5m 6m 7m 8m 9m 10m 20m 30m 1h 2h"
String levelString = this.defaultMessageStore.getMessageStoreConfig().getMessageDelayLevel();
try {
// 分隔成18个级别,并用数组存放
String[] levelArray = levelString.split(" ");
// 遍历该数组
for (int i = 0; i < levelArray.length; i++) {
// "1s 5s 10s 30s 1m 2m 3m 4m 5m 6m 7m 8m 9m 10m 20m 30m 1h 2h" 中的一个
String value = levelArray[i];
// 得出单位s/m/h
String ch = value.substring(value.length() - 1);
// 得出该单位换算后的毫秒值
Long tu = timeUnitTable.get(ch);
int level = i + 1;
// 求出最大的延迟级别,18
if (level > this.maxDelayLevel) {
this.maxDelayLevel = level;
}
// 求出各延迟级别中的实际延迟值(除去单位)
long num = Long.parseLong(value.substring(0, value.length() - 1));
// 乘以 自己单位换算的毫秒值,得出该延迟级别实际要延迟的毫秒数
long delayTimeMillis = tu * num;
// 将各延迟级别对应的延迟毫秒数放入map中
this.delayLevelTable.put(level, delayTimeMillis);
}
} catch (Exception e) {
log.error("parseDelayLevel exception", e);
log.info("levelString String = {}", levelString);
return false;
}
return true;
}
DeliverDelayedMessageTimerTask.run
public void run() {
try {
if (isStarted()) {
// 执行任务方法
this.executeOnTimeup();
}
} catch (Exception e) {
// XXX: warn and notify me
log.error("ScheduleMessageService, executeOnTimeup exception", e);
ScheduleMessageService.this.timer.schedule(new DeliverDelayedMessageTimerTask(
this.delayLevel, this.offset), DELAY_FOR_A_PERIOD);
}
}
DeliverDelayedMessageTimerTask.executeOnTimeup
public void executeOnTimeup() {
// 通过 SCHEDULE_TOPIC(固定为:SCHEDULE_TOPIC_XXXX) 的topic名查询/创建新的cq,这其实就是告诉我们,Broker内部对所有的重试消息用同一个topic来做cq逻辑存储
ConsumeQueue cq =
ScheduleMessageService.this.defaultMessageStore.findConsumeQueue(SCHEDULE_TOPIC,
delayLevel2QueueId(delayLevel));
// 失败任务偏移,先假定是失败的
long failScheduleOffset = offset;
if (cq != null) {
// 从cq中取出数据并mmap映射至内存
SelectMappedBufferResult bufferCQ = cq.getIndexBuffer(this.offset);
if (bufferCQ != null) {
try {
// 因为这个任务一次处理一条,所以要记录下一批要处理的偏移值,留待下次任务执行时处理
long nextOffset = offset;
int i = 0;
// cq单元对象(commitlog offset + msg size + tags code)
ConsumeQueueExt.CqExtUnit cqExtUnit = new ConsumeQueueExt.CqExtUnit();
// 遍历每一个可读的单元(20B)
for (; i < bufferCQ.getSize(); i += ConsumeQueue.CQ_STORE_UNIT_SIZE) {
// commitlog offset
long offsetPy = bufferCQ.getByteBuffer().getLong();
// 大小
int sizePy = bufferCQ.getByteBuffer().getInt();
// tags code,这里要讲解一下,对于重试消息,tagsCode存的是即将执行的时间戳,这一点可以看 Commitlog.checkMessageAndReturnSize 方法
long tagsCode = bufferCQ.getByteBuffer().getLong();
// 默认是false,忽略
if (cq.isExtAddr(tagsCode)) {
if (cq.getExt(tagsCode, cqExtUnit)) {
tagsCode = cqExtUnit.getTagsCode();
} else {
//can't find ext content.So re compute tags code.
log.error("[BUG] can't find consume queue extend file content!addr={}, offsetPy={}, sizePy={}",
tagsCode, offsetPy, sizePy);
long msgStoreTime = defaultMessageStore.getCommitLog().pickupStoreTimestamp(offsetPy, sizePy);
tagsCode = computeDeliverTimestamp(delayLevel, msgStoreTime);
}
}
long now = System.currentTimeMillis();
// 计算最终的分发时间,也就是分发到逻辑cq,供消费端消费
long deliverTimestamp = this.correctDeliverTimestamp(now, tagsCode);
// 计算下一个单元的偏移
nextOffset = offset + (i / ConsumeQueue.CQ_STORE_UNIT_SIZE);
// 定时执行的时间 - 当前时间
long countdown = deliverTimestamp - now;
// <= 0,表示当前时间已经可以执行分发了
if (countdown <= 0) {
// 从commitlog中找出真实的消息
MessageExt msgExt =
ScheduleMessageService.this.defaultMessageStore.lookMessageByOffset(
offsetPy, sizePy);
// 找到了
if (msgExt != null) {
try {
// 将消息转换成真实的topic后,再一次通过 putMessage 放到commitlog中,并走正常消息的发送和分发流程,你看里又复制了一遍消息,并又放到commitlog中,这样看来,一条正常的重试消息至少要被放到commitlog中三次:1.生产者正常发送消息存一次;2.消费端重推消息存一次;3.到期执行延迟消息的分发又一次
MessageExtBrokerInner msgInner = this.messageTimeup(msgExt);
PutMessageResult putMessageResult =
ScheduleMessageService.this.writeMessageStore
.putMessage(msgInner);
// 执行完了,再执行下一个
if (putMessageResult != null
&& putMessageResult.getPutMessageStatus() == PutMessageStatus.PUT_OK) {
continue;
} else {
// XXX: warn and notify me
log.error(
"ScheduleMessageService, a message time up, but reput it failed, topic: {} msgId {}",
msgExt.getTopic(), msgExt.getMsgId());
ScheduleMessageService.this.timer.schedule(
new DeliverDelayedMessageTimerTask(this.delayLevel,
nextOffset), DELAY_FOR_A_PERIOD);
ScheduleMessageService.this.updateOffset(this.delayLevel,
nextOffset);
return;
}
} catch (Exception e) {
/*
* XXX: warn and notify me
*/
log.error(
"ScheduleMessageService, messageTimeup execute error, drop it. msgExt="
+ msgExt + ", nextOffset=" + nextOffset + ",offsetPy="
+ offsetPy + ",sizePy=" + sizePy, e);
}
}
} else {// ? 0,表示当前时间不可以执行分发了,得继续排除等待
// 下次执行,下次的时间就是 countdown 后
ScheduleMessageService.this.timer.schedule(
new DeliverDelayedMessageTimerTask(this.delayLevel, nextOffset),
countdown);
ScheduleMessageService.this.updateOffset(this.delayLevel, nextOffset);
return;
}
} // end of for
nextOffset = offset + (i / ConsumeQueue.CQ_STORE_UNIT_SIZE);
ScheduleMessageService.this.timer.schedule(new DeliverDelayedMessageTimerTask(
this.delayLevel, nextOffset), DELAY_FOR_A_WHILE);
ScheduleMessageService.this.updateOffset(this.delayLevel, nextOffset);
return;
} finally {
bufferCQ.release();
}
} // end of if (bufferCQ != null)
else {
long cqMinOffset = cq.getMinOffsetInQueue();
if (offset < cqMinOffset) {
failScheduleOffset = cqMinOffset;
log.error("schedule CQ offset invalid. offset=" + offset + ", cqMinOffset="
+ cqMinOffset + ", queueId=" + cq.getQueueId());
}
}
} // end of if (cq != null)
// 这一行很重要,再一次启动任务,为下一次的任务执行,否则整个任务就执行一次就结束了
ScheduleMessageService.this.timer.schedule(new DeliverDelayedMessageTimerTask(this.delayLevel,
failScheduleOffset), DELAY_FOR_A_WHILE);
}
Commitlog.checkMessageAndReturnSize
public DispatchRequest checkMessageAndReturnSize(java.nio.ByteBuffer byteBuffer, final boolean checkCRC,
final boolean readBody) {
// 省略其他无关代码
// Timing message processing
{
String t = propertiesMap.get(MessageConst.PROPERTY_DELAY_TIME_LEVEL);
if (ScheduleMessageService.SCHEDULE_TOPIC.equals(topic) && t != null) {
int delayLevel = Integer.parseInt(t);
if (delayLevel > this.defaultMessageStore.getScheduleMessageService().getMaxDelayLevel()) {
delayLevel = this.defaultMessageStore.getScheduleMessageService().getMaxDelayLevel();
}
if (delayLevel > 0) { // > 0,表示延迟消息,也就是重试消息
// 这个时候的tagsCode,实际上就是时间戳了
tagsCode = this.defaultMessageStore.getScheduleMessageService().computeDeliverTimestamp(delayLevel,
storeTimestamp);
}
}
}
// 省略其他无关代码
}
至此,RocketMQ的源码解析已完成,至于死信队列(DLQ),这个比较简单,就不单独来讲,DLQ直接参考重试消息的逻辑,只是DLQ的消息不再可以被消费。如果实在需要消费,有两种途径:
一、消费者端写代码订阅topic为DLQ的topic的主题,并要在dashboard中修改topic配置的perm值为4/6,否则无法消费
二、在dashboard中直接点击消息,并重发