RocketMQ版本4.6.0,记录自己看源码的过程
顺序消费稍微复杂点,主要是顺序消费对重平衡以及消息拉取都有影响。
重平衡
先看下重平衡,主要不同在于根据负载均衡策略计算完每个消费者分配到的消费队列后,判断该消费者的订阅的队列是否有变方法中(updateProcessQueueTableInRebalance),在对新分配到的消费队列创建拉取任务前,需要先获取锁。
RebalanceImpl
private boolean updateProcessQueueTableInRebalance(final String topic, final Set<MessageQueue> mqSet,
final boolean isOrder) {
boolean changed = false;
// 当前消费者已经分配到的消息队列
Iterator<Entry<MessageQueue, ProcessQueue>> it = this.processQueueTable.entrySet().iterator();
while (it.hasNext()) {
Entry<MessageQueue, ProcessQueue> next = it.next();
MessageQueue mq = next.getKey();
ProcessQueue pq = next.getValue();
// 不属于该topic的队列跳过
if (mq.getTopic().equals(topic)) {
// 如果重新分配后的队列不包含旧的队列mq,说明经过重平衡后,该队列被分配给别的消费者了,
// 所以需要暂停当前消费者对该队列的消费,将该ProcessQueue设置dropped=true,并从本地缓存中移除
if (!mqSet.contains(mq)) {
pq.setDropped(true);
if (this.removeUnnecessaryMessageQueue(mq, pq)) {
// 从订阅缓存中移除该队列
it.remove();
changed = true;
log.info("doRebalance, {}, remove unnecessary mq, {}", consumerGroup, mq);
}
} else if (pq.isPullExpired()) {
switch (this.consumeType()) {
case CONSUME_ACTIVELY:
break;
case CONSUME_PASSIVELY:
pq.setDropped(true);
if (this.removeUnnecessaryMessageQueue(mq, pq)) {
it.remove();
changed = true;
log.error("[BUG]doRebalance, {}, remove unnecessary mq, {}, because pull is pause, so try to fixed it",
consumerGroup, mq);
}
break;
default:
break;
}
}
}
}
List<PullRequest> pullRequestList = new ArrayList<PullRequest>();
for (MessageQueue mq : mqSet) {
// 如果旧的缓存订阅的队列中没有新的队列,说明经过重平衡后,该消费者分配到了新的队列
// 则需要创建PullRequest拉取消息任务去拉取消息
if (!this.processQueueTable.containsKey(mq)) {
// 如果是顺序消费,则需要去broker中锁住该队列
// 获取到锁,则创建一个拉取任务去拉取消息,
// 没获取到锁,则跳过,等待下一次重平衡再重新去获取锁(该队列在没获得锁期间可能就不会被消费)
if (isOrder && !this.lock(mq)) {
log.warn("doRebalance, {}, add a new mq failed, {}, because lock failed", consumerGroup, mq);
continue;
}
// 从内存中删除该消息队列的消费进度
this.removeDirtyOffset(mq);
// 创建一个新的ProcessQueue与该消息队列对应
ProcessQueue pq = new ProcessQueue();
// 计算从哪开始拉取
long nextOffset = this.computePullFromWhere(mq);
if (nextOffset >= 0) {
ProcessQueue pre = this.processQueueTable.putIfAbsent(mq, pq);
if (pre != null) {
log.info("doRebalance, {}, mq already exists, {}", consumerGroup, mq);
} else {
log.info("doRebalance, {}, add a new mq, {}", consumerGroup, mq);
PullRequest pullRequest = new PullRequest();
pullRequest.setConsumerGroup(consumerGroup);
pullRequest.setNextOffset(nextOffset);
pullRequest.setMessageQueue(mq);
pullRequest.setProcessQueue(pq);
pullRequestList.add(pullRequest);
changed = true;
}
} else {
log.warn("doRebalance, {}, add new mq failed, {}", consumerGroup, mq);
}
}
}
// 立即去执行拉取任务,其实就是将拉取请求任务放到拉取请求队列中
this.dispatchPullRequest(pullRequestList);
return changed;
}
移除已经不属于的队列
1、如果重新分配后的队列不包含旧的队列mq,说明经过重平衡后,该队列被分配给别的消费者了,所以需要暂停当前消费者对该队列的消费,将该ProcessQueue设置dropped=true,并从本地缓存中移除,并且会同时释放broker中的锁。
RebalancePushImpl
@Override
public boolean removeUnnecessaryMessageQueue(MessageQueue mq, ProcessQueue pq) {
// 持久化该MessageQueue的消费进度
this.defaultMQPushConsumerImpl.getOffsetStore().persist(mq);
this.defaultMQPushConsumerImpl.getOffsetStore().removeOffset(mq);
// 如果是顺序消费并且是集群模式,则需要去释放锁
if (this.defaultMQPushConsumerImpl.isConsumeOrderly()
&& MessageModel.CLUSTERING.equals(this.defaultMQPushConsumerImpl.messageModel())) {
try {
// 避免和消息队列消费冲突。如果获取锁失败,说明该processQueue中的消息正在执行自定义消费逻辑,
// 则移除消息队列失败,等待下次重新分配消费队列时,再进行移除。
// 如果未获得锁而进行移除,则可能出现另外的 Consumer 和当前 Consumer 同时消费该消息队列,导致消息无法顺序消费
if (pq.getLockConsume().tryLock(1000, TimeUnit.MILLISECONDS)) {
try {
// 向broker请求释放锁,返回true
return this.unlockDelay(mq, pq);
} finally {
pq.getLockConsume().unlock();
}
} else {
log.warn("[WRONG]mq is consuming, so can not unlock it, {}. maybe hanged for a while, {}",
mq,
pq.getTryUnlockTimes());
pq.incTryUnlockTimes();
}
} catch (Exception e) {
log.error("removeUnnecessaryMessageQueue Exception", e);
}
return false;
}
return true;
}
这里就是第一处与并发消费不同的地方,在移除之前得先释放broker端占用的锁(释放逻辑在下文说明),释放之前还得先获得processQueue的消费锁,避免和消息队列消费冲突。如果获取消费锁失败,说明该processQueue中的消息正在执行自定义消费逻辑(因为消费之前也得先获取消费锁),则移除消息队列失败,等待下次重新分配消费队列时,再进行移除。如果未获得消费锁而进行移除,则可能出现另外的 Consumer 和当前 Consumer 同时消费该消息队列,导致该消息队列的消息无法顺序消费。
分配到新的队列
2、如果旧的缓存订阅的队列中没有新的队列,说明经过重平衡后,该消费者分配到了新的队列,则需要创建PullRequest拉取消息任务去拉取消息。这里也是跟并发消费不同的地方,如果是顺序消费,则需要去broker中锁住该队列,成功锁住,则创建一个拉取任务去拉取消息,否则跳过,等待下一次重平衡再重新去获取锁。
/**
* 向broker申请锁住该消息队列
*/
public boolean lock(final MessageQueue mq) {
// 获取master broker地址
FindBrokerResult findBrokerResult = this.mQClientFactory.findBrokerAddressInSubscribe(mq.getBrokerName(), MixAll.MASTER_ID, true);
if (findBrokerResult != null) {
// 申请锁队列的请求体
LockBatchRequestBody requestBody = new LockBatchRequestBody();
requestBody.setConsumerGroup(this.consumerGroup);
requestBody.setClientId(this.mQClientFactory.getClientId());
// 表示要去锁住的队列
requestBody.getMqSet().add(mq);
try {
// 向broker同步发送锁队列请求,请求码是LOCK_BATCH_MQ,返回成功锁住的队列
// 这里其实就是返回对该mq是否成功获得锁
Set<MessageQueue> lockedMq =
this.mQClientFactory.getMQClientAPIImpl().lockBatchMQ(findBrokerResult.getBrokerAddr(), requestBody, 1000);
for (MessageQueue mmqq : lockedMq) {
ProcessQueue processQueue = this.processQueueTable.get(mmqq);
// 当前获得锁的队列本身就是已经分配到的队列,则标记锁住以及更新锁时间
// 锁定消息队列成功,如果本地没有消息处理队列,设置锁定成功会在lockAll()方法
// 新分配的消息队列对应的processQueue是空的,这里不会设置locked=true,所以这时还无法拉取任务,
// ConsumeMessageOrderlyService启动的时候会每隔20s执行lockAll方法,给分配到的队列加锁(locked=true和更新锁时间),
// 那时才可以进行该队列的第一次拉取任务
if (processQueue != null) {
processQueue.setLocked(true);
processQueue.setLastLockTimestamp(System.currentTimeMillis());
}
}
// 返回是否成功获得该队列的锁
boolean lockOK = lockedMq.contains(mq);
log.info("the message queue lock {}, {} {}",
lockOK ? "OK" : "Failed",
this.consumerGroup,
mq);
return lockOK;
} catch (Exception e) {
log.error("lockBatchMQ exception, " + mq, e);
}
}
return false;
}
在Broker端,锁定队列的请求由AdminBrokerProcessor处理器处理
/**
* 批量锁队列请求
*/
private RemotingCommand lockBatchMQ(ChannelHandlerContext ctx,
RemotingCommand request) throws RemotingCommandException {
final RemotingCommand response = RemotingCommand.createResponseCommand(null);
LockBatchRequestBody requestBody = LockBatchRequestBody.decode(request.getBody(), LockBatchRequestBody.class);
// 通过重平衡锁管理器去锁消息队列,返回锁定成功的消费队列
// 锁定失败就代表消息队列被别的消费者锁住了并且还没有过期
Set<MessageQueue> lockOKMQSet = this.brokerController.getRebalanceLockManager().tryLockBatch(
requestBody.getConsumerGroup(),
requestBody.getMqSet(),
requestBody.getClientId());
LockBatchResponseBody responseBody = new LockBatchResponseBody();
// 将锁定成功的队列响应回去
responseBody.setLockOKMQSet(lockOKMQSet);
response.setBody(responseBody.encode());
response.setCode(ResponseCode.SUCCESS);
response.setRemark(null);
return response;
}
通过重平衡锁管理器RebalanceLockManager去锁消息队列
public class RebalanceLockManager {
/**
* 锁过期时间,默认配置30s,没有配置则默认60s。Consumer需要不断刷新该锁过期时间,默认配置 20s 刷新一次
*/
private final static long REBALANCE_LOCK_MAX_LIVE_TIME = Long.parseLong(System.getProperty(
"rocketmq.broker.rebalance.lockMaxLiveTime", "60000"));
private final Lock lock = new ReentrantLock();
/**
* 保存每个消费组消费队列锁定情况,以消费组名为key,而不以topic为key,是因为每个topic都可能会被多个消费组订阅,
* 各个消费组互不影响,每个消费组可以同时锁住同一个消费队列,所以以消费组为单位保存
*/
private final ConcurrentMap<String/* group */, ConcurrentHashMap<MessageQueue, LockEntry>> mqLockTable =
new ConcurrentHashMap<String, ConcurrentHashMap<MessageQueue, LockEntry>>(1024);
/**
* 尝试去锁消息队列
*
* @param mqs 表示消费者尝试去申请锁的队列
*/
public Set<MessageQueue> tryLockBatch(final String group, final Set<MessageQueue> mqs,
final String clientId) {
// 目前已被clientId对应的消费者锁住的队列和未锁住的队列
Set<MessageQueue> lockedMqs = new HashSet<MessageQueue>(mqs.size());
Set<MessageQueue> notLockedMqs = new HashSet<MessageQueue>(mqs.size());
for (MessageQueue mq : mqs) {
// 判断该消费队列是否已被clientId对应的消费者锁住
if (this.isLocked(group, mq, clientId)) {
lockedMqs.add(mq);
} else {
notLockedMqs.add(mq);
}
}
if (!notLockedMqs.isEmpty()) {
try {
this.lock.lockInterruptibly();
try {
// 该消费组下消息队列锁定情况,表示消息队列被该消费组中的哪个消费者锁定
ConcurrentHashMap<MessageQueue, LockEntry> groupValue = this.mqLockTable.get(group);
if (null == groupValue) {
groupValue = new ConcurrentHashMap<>(32);
this.mqLockTable.put(group, groupValue);
}
// 对于没被我(clientId)锁定的队列,开始尝试去锁定
for (MessageQueue mq : notLockedMqs) {
LockEntry lockEntry = groupValue.get(mq);
// 为空就是该队列还没被锁定,可以直接被我(clientId)锁定
if (null == lockEntry) {
lockEntry = new LockEntry();
lockEntry.setClientId(clientId);
groupValue.put(mq, lockEntry);
log.info(
"tryLockBatch, message queue not locked, I got it. Group: {} NewClientId: {} {}",
group,
clientId,
mq);
}
// 如果被我锁住了,就更新锁住时间,添加到锁定队列中
if (lockEntry.isLocked(clientId)) {
lockEntry.setLastUpdateTimestamp(System.currentTimeMillis());
lockedMqs.add(mq);
continue;
}
// 到这说明队列被别的消费者锁住了
String oldClientId = lockEntry.getClientId();
// 如果过期了就直接换我锁住
if (lockEntry.isExpired()) {
lockEntry.setClientId(clientId);
lockEntry.setLastUpdateTimestamp(System.currentTimeMillis());
log.warn(
"tryLockBatch, message queue lock expired, I got it. Group: {} OldClientId: {} NewClientId: {} {}",
group,
oldClientId,
clientId,
mq);
lockedMqs.add(mq);
continue;
}
log.warn(
"tryLockBatch, message queue locked by other client. Group: {} OtherClientId: {} NewClientId: {} {}",
group,
oldClientId,
clientId,
mq);
}
} finally {
this.lock.unlock();
}
} catch (InterruptedException e) {
log.error("putMessage exception", e);
}
}
// 返回锁住的队列
return lockedMqs;
}
}
Step1:首先将要申请锁的队列按是否已经被clientId对应的消费者锁定分为两个集合。
判断是否被该clientId锁定:
/**
* 判断指定消息队列是否已被clientId消费者客户端锁定
*/
private boolean isLocked(final String group, final MessageQueue mq, final String clientId) {
// 该消费组下消息队列锁定情况,表示消息队列被该消费组中的哪个消费者锁定
ConcurrentHashMap<MessageQueue, LockEntry> groupValue = this.mqLockTable.get(group);
if (groupValue != null) {
LockEntry lockEntry = groupValue.get(mq);
if (lockEntry != null) {
// 如果该队列原先就被该消费者客户端锁定,并且还没过期,则更新锁定时间,相当于重新倒计时过期时间
boolean locked = lockEntry.isLocked(clientId);
if (locked) {
lockEntry.setLastUpdateTimestamp(System.currentTimeMillis());
}
return locked;
}
}
// 该队列在该消费组下没被锁定
return false;
}
Step2:获取该消费组下消息队列的锁定情况。
Step3:迭代对没被该消费者(clientId)锁定的队列,开始尝试去锁定。
Step4:最终返回成功被锁定的队列。
这里顺便说下释放锁的逻辑:
/**
* 释放clientId对mqs这些队列的锁
*/
public void unlockBatch(final String group, final Set<MessageQueue> mqs, final String clientId) {
try {
this.lock.lockInterruptibly();
try {
ConcurrentHashMap<MessageQueue, LockEntry> groupValue = this.mqLockTable.get(group);
if (null != groupValue) {
for (MessageQueue mq : mqs) {
LockEntry lockEntry = groupValue.get(mq);
if (null != lockEntry) {
if (lockEntry.getClientId().equals(clientId)) {
groupValue.remove(mq);
log.info("unlockBatch, Group: {} {} {}",
group,
mq,
clientId);
} else {
log.warn("unlockBatch, but mq locked by other client: {}, Group: {} {} {}",
lockEntry.getClientId(),
group,
mq,
clientId);
}
} else {
log.warn("unlockBatch, but mq not locked, Group: {} {} {}",
group,
mq,
clientId);
}
}
} else {
log.warn("unlockBatch, group not exist, Group: {} {}",
group,
clientId);
}
} finally {
this.lock.unlock();
}
} catch (InterruptedException e) {
log.error("putMessage exception", e);
}
}
释放锁就比较简单了,循环要释放锁的队列,判断是否被该消费者锁定,如果是,则移除锁定记录。
接着回到上面的lock方法中,从broker锁定队列后,会去设置对应的处理队列processQueue为锁定状态,这样才可以去拉取消息。
Set<MessageQueue> lockedMq =
this.mQClientFactory.getMQClientAPIImpl().lockBatchMQ(findBrokerResult.getBrokerAddr(), requestBody, 1000);
for (MessageQueue mmqq : lockedMq) {
ProcessQueue processQueue = this.processQueueTable.get(mmqq);
// 当前获得锁的队列本身就是已经分配到的队列,则标记锁住以及更新锁时间
// 锁定消息队列成功,如果本地没有消息处理队列,设置锁定成功会在lockAll()方法
// 新分配的消息队列对应的processQueue是空的,这里不会设置locked=true,所以这时还无法拉取任务,
// ConsumeMessageOrderlyService启动的时候会每隔20s执行lockAll方法,给分配到的队列加锁(locked=true和更新锁时间),
// 那时才可以进行该队列的第一次拉取任务
if (processQueue != null) {
processQueue.setLocked(true);
processQueue.setLastLockTimestamp(System.currentTimeMillis());
}
}
但如果是新分配的消息队列,是还没有processQueue的,所以这里是不会去锁定processQueue的。由于processQueue的locked属性默认是false的,所以为新分配的消息队列创建的processQueue是还没有锁定状态的,无法去拉取消息。但是在启动顺序消费服务时会启动一个后台定时任务,定时去为消费者分配到的消息队列去设置锁定状态以及更新锁定时间。
ConsumeMessageOrderlyService
/**
* 启动顺序消费服务
*/
public void start() {
if (MessageModel.CLUSTERING.equals(ConsumeMessageOrderlyService.this.defaultMQPushConsumerImpl.messageModel())) {
// 每隔20s锁定队列
this.scheduledExecutorService.scheduleAtFixedRate(new Runnable() {
@Override
public void run() {
ConsumeMessageOrderlyService.this.lockMQPeriodically();
}
}, 1000 * 1, ProcessQueue.REBALANCE_LOCK_INTERVAL, TimeUnit.MILLISECONDS);
}
}
public synchronized void lockMQPeriodically() {
if (!this.stopped) {
this.defaultMQPushConsumerImpl.getRebalanceImpl().lockAll();
}
}
使用重平衡组件RebalanceImpl去处理
/**
* 锁定所分配到的所有消息队列
*/
public void lockAll() {
// 根据processQueueTable构建一个map,key为broker名称,value为该消费者在该broker上分配到的消息队列
HashMap<String, Set<MessageQueue>> brokerMqs = this.buildProcessQueueTableByBrokerName();
Iterator<Entry<String, Set<MessageQueue>>> it = brokerMqs.entrySet().iterator();
while (it.hasNext()) {
Entry<String, Set<MessageQueue>> entry = it.next();
final String brokerName = entry.getKey();
final Set<MessageQueue> mqs = entry.getValue();
if (mqs.isEmpty())
continue;
// 找到brokerName对应的master broker,向该broker发送批量锁消息队列的请求
FindBrokerResult findBrokerResult = this.mQClientFactory.findBrokerAddressInSubscribe(brokerName, MixAll.MASTER_ID, true);
if (findBrokerResult != null) {
LockBatchRequestBody requestBody = new LockBatchRequestBody();
requestBody.setConsumerGroup(this.consumerGroup);
requestBody.setClientId(this.mQClientFactory.getClientId());
requestBody.setMqSet(mqs);
try {
// 返回成功锁定的消息队列
Set<MessageQueue> lockOKMQSet =
this.mQClientFactory.getMQClientAPIImpl().lockBatchMQ(findBrokerResult.getBrokerAddr(), requestBody, 1000);
// 将锁定的对应的处理队列locked设为true并更新锁定时间
for (MessageQueue mq : lockOKMQSet) {
ProcessQueue processQueue = this.processQueueTable.get(mq);
if (processQueue != null) {
if (!processQueue.isLocked()) {
log.info("the message queue locked OK, Group: {} {}", this.consumerGroup, mq);
}
processQueue.setLocked(true);
processQueue.setLastLockTimestamp(System.currentTimeMillis());
}
}
// 没锁定成功的locked设为false
for (MessageQueue mq : mqs) {
if (!lockOKMQSet.contains(mq)) {
ProcessQueue processQueue = this.processQueueTable.get(mq);
if (processQueue != null) {
processQueue.setLocked(false);
log.warn("the message queue locked Failed, Group: {} {}", this.consumerGroup, mq);
}
}
}
} catch (Exception e) {
log.error("lockBatchMQ exception, " + mqs, e);
}
}
}
}
拉取消息
在拉取消息时,会先判断processQueue是否是锁定状态,如果没有锁定状态,则会延迟3s后再将pullRequest重新放回拉取任务中,必须是锁定状态下才可以拉取消息。
DefaultMQPushConsumerImpl
/**
* 根据拉取任务拉取消息
*/
public void pullMessage(final PullRequest pullRequest) {
final ProcessQueue processQueue = pullRequest.getProcessQueue();
if (processQueue.isDropped()) {
log.info("the pull request[{}] is dropped.", pullRequest.toString());
return;
}
// 省略。。。
// 并发消费
if (!this.consumeOrderly) {
// 限流
if (processQueue.getMaxSpan() > this.defaultMQPushConsumer.getConsumeConcurrentlyMaxSpan()) {
this.executePullRequestLater(pullRequest, PULL_TIME_DELAY_MILLS_WHEN_FLOW_CONTROL);
if ((queueMaxSpanFlowControlTimes++ % 1000) == 0) {
log.warn(
"the queue's messages, span too long, so do flow control, minOffset={}, maxOffset={}, maxSpan={}, pullRequest={}, flowControlTimes={}",
processQueue.getMsgTreeMap().firstKey(), processQueue.getMsgTreeMap().lastKey(), processQueue.getMaxSpan(),
pullRequest, queueMaxSpanFlowControlTimes);
}
return;
}
} else {
// 顺序消费
if (processQueue.isLocked()) {
// 如果该消费队列是第一次拉取消息,则先计算拉取偏移量
if (!pullRequest.isLockedFirst()) {
final long offset = this.rebalanceImpl.computePullFromWhere(pullRequest.getMessageQueue());
boolean brokerBusy = offset < pullRequest.getNextOffset();
log.info("the first time to pull message, so fix offset from broker. pullRequest: {} NewOffset: {} brokerBusy: {}",
pullRequest, offset, brokerBusy);
if (brokerBusy) {
log.info("[NOTIFYME]the first time to pull message, but pull request offset larger than broker consume offset. pullRequest: {} NewOffset: {}",
pullRequest, offset);
}
pullRequest.setLockedFirst(true);
pullRequest.setNextOffset(offset);
}
} else {
// 如果队列没被锁定则延迟3s后再将pullRequest重新放回拉取任务中
// 必须是锁定状态下才可以拉取消息
this.executePullRequestLater(pullRequest, pullTimeDelayMillsWhenException);
log.info("pull message later because not locked in broker, {}", pullRequest);
return;
}
}
// 去发送拉取请求。。。
}
消费消息
消息拉取到后,会交给消费线程消费
/**
* 顺序消费线程
*/
class ConsumeRequest implements Runnable {
private final ProcessQueue processQueue;
private final MessageQueue messageQueue;
public ConsumeRequest(ProcessQueue processQueue, MessageQueue messageQueue) {
this.processQueue = processQueue;
this.messageQueue = messageQueue;
}
public ProcessQueue getProcessQueue() {
return processQueue;
}
public MessageQueue getMessageQueue() {
return messageQueue;
}
@Override
public void run() {
if (this.processQueue.isDropped()) {
log.warn("run, the message queue not be able to consume, because it's dropped. {}", this.messageQueue);
return;
}
// 获取消息队列锁对象
final Object objLock = messageQueueLock.fetchLockObject(this.messageQueue);
// 每拉取一次消息,都会提交一次消费请求,所以这里可能会卡住多个线程,永远只会有一个线程往下执行,
// 由于几个processQueue都是固定的,所以不管哪个线程往下执行,都是从固定的processQueue中取消息,
// 保证同一个消息队列同时只会被一个线程消费,消息都是按顺序取的,不会乱序。
// 不同消息队列可以并行执行的
synchronized (objLock) {
// (广播模式) 或者 (集群模式 && Broker消息队列分布式锁有效)
if (MessageModel.BROADCASTING.equals(ConsumeMessageOrderlyService.this.defaultMQPushConsumerImpl.messageModel())
|| (this.processQueue.isLocked() && !this.processQueue.isLockExpired())) {
final long beginTime = System.currentTimeMillis();
for (boolean continueConsume = true; continueConsume; ) {
if (this.processQueue.isDropped()) {
log.warn("the message queue not be able to consume, because it's dropped. {}", this.messageQueue);
break;
}
// 还未锁定消息队列,则先发送请求到broker去锁定消息队列后,再重新提交消费请求
if (MessageModel.CLUSTERING.equals(ConsumeMessageOrderlyService.this.defaultMQPushConsumerImpl.messageModel())
&& !this.processQueue.isLocked()) {
log.warn("the message queue not locked, so consume later, {}", this.messageQueue);
ConsumeMessageOrderlyService.this.tryLockLaterAndReconsume(this.messageQueue, this.processQueue, 10);
break;
}
// 锁定消息队列过期了,,则先发送请求到broker去锁定消息队列后(更新锁定时间),再重新提交消费请求
if (MessageModel.CLUSTERING.equals(ConsumeMessageOrderlyService.this.defaultMQPushConsumerImpl.messageModel())
&& this.processQueue.isLockExpired()) {
log.warn("the message queue lock expired, so consume later, {}", this.messageQueue);
ConsumeMessageOrderlyService.this.tryLockLaterAndReconsume(this.messageQueue, this.processQueue, 10);
break;
}
// 该线程执行超过60s了,重新提交消费请求后线程结束,交由其它线程执行
long interval = System.currentTimeMillis() - beginTime;
if (interval > MAX_TIME_CONSUME_CONTINUOUSLY) {
ConsumeMessageOrderlyService.this.submitConsumeRequestLater(processQueue, messageQueue, 10);
break;
}
// 默认为1
final int consumeBatchSize =
ConsumeMessageOrderlyService.this.defaultMQPushConsumer.getConsumeMessageBatchMaxSize();
// 和并发消费获得消息不同。并发消费请求在请求创建时,已经从processQueue取出并设置好消费哪些消息,
// 而这里顺序消费是在请求中顺序的从processQueue取出消息
// 将消息从msgTreeMap中顺序取出consumeBatchSize条消息放到consumingMsgOrderlyTreeMap中
List<MessageExt> msgs = this.processQueue.takeMessags(consumeBatchSize);
defaultMQPushConsumerImpl.resetRetryAndNamespace(msgs, defaultMQPushConsumer.getConsumerGroup());
if (!msgs.isEmpty()) {
final ConsumeOrderlyContext context = new ConsumeOrderlyContext(this.messageQueue);
ConsumeOrderlyStatus status = null;
ConsumeMessageContext consumeMessageContext = null;
if (ConsumeMessageOrderlyService.this.defaultMQPushConsumerImpl.hasHook()) {
consumeMessageContext = new ConsumeMessageContext();
consumeMessageContext
.setConsumerGroup(ConsumeMessageOrderlyService.this.defaultMQPushConsumer.getConsumerGroup());
consumeMessageContext.setNamespace(defaultMQPushConsumer.getNamespace());
consumeMessageContext.setMq(messageQueue);
consumeMessageContext.setMsgList(msgs);
consumeMessageContext.setSuccess(false);
// init the consume context type
consumeMessageContext.setProps(new HashMap<String, String>());
ConsumeMessageOrderlyService.this.defaultMQPushConsumerImpl.executeHookBefore(consumeMessageContext);
}
long beginTimestamp = System.currentTimeMillis();
ConsumeReturnType returnType = ConsumeReturnType.SUCCESS;
boolean hasException = false;
try {
// 消费之前先获取消费锁
this.processQueue.getLockConsume().lock();
if (this.processQueue.isDropped()) {
log.warn("consumeMessage, the message queue not be able to consume, because it's dropped. {}",
this.messageQueue);
break;
}
// 执行自己的消费逻辑
status = messageListener.consumeMessage(Collections.unmodifiableList(msgs), context);
} catch (Throwable e) {
log.warn("consumeMessage exception: {} Group: {} Msgs: {} MQ: {}",
RemotingHelper.exceptionSimpleDesc(e),
ConsumeMessageOrderlyService.this.consumerGroup,
msgs,
messageQueue);
hasException = true;
} finally {
// 释放消费锁
this.processQueue.getLockConsume().unlock();
}
// 省略状态转换
// 处理消费结果
continueConsume = ConsumeMessageOrderlyService.this.processConsumeResult(msgs, status, context, this);
} else {
continueConsume = false;
}
}
} else {
if (this.processQueue.isDropped()) {
log.warn("the message queue not be able to consume, because it's dropped. {}", this.messageQueue);
return;
}
// processQueue不是锁定状态或锁过期了,则会去broker锁定当前队列,
// 不管锁定成功与否,都会重新提交消费请求
ConsumeMessageOrderlyService.this.tryLockLaterAndReconsume(this.messageQueue, this.processQueue, 100);
}
}
}
}
Step1:首先获取消息队列锁对象,保证同一时间只有一个线程来消费该队列,这样才可以有序。
Step2:(广播模式) 或者 (集群模式 && Broker消息队列分布式锁有效),否则会发送请求去broker锁定当前队列,然后重新提交消费请求。
Step3:如果是集群模式,再重新走下第二步的流程。
Step4:如果该线程执行超过60s了,重新提交消费请求后线程结束,交由其它线程执行。
Step5:从processQueue中顺序的取出消息,这里也是跟并发消费不一样的地方,并发消费在提交消费请求时就已经从processQueue中取出消息,在这里直接就可以消费。
Step6:消费之前先获取processQueue中的消费锁。
Step7:调用消费监听器消费。
Step8:释放消费锁。
Step9:处理消费结果,这里面也跟并发消费不太一样
/**
* 处理顺序消费结果
*/
public boolean processConsumeResult(
final List<MessageExt> msgs,
final ConsumeOrderlyStatus status,
final ConsumeOrderlyContext context,
final ConsumeRequest consumeRequest
) {
boolean continueConsume = true;
long commitOffset = -1L;
if (context.isAutoCommit()) {
switch (status) {
case COMMIT:
case ROLLBACK:
log.warn("the message queue consume result is illegal, we think you want to ack these message {}",
consumeRequest.getMessageQueue());
case SUCCESS:
// 清空ProcessQueue中的consumingMsgOrderlyTreeMap,返回最后一条消息的offset
commitOffset = consumeRequest.getProcessQueue().commit();
this.getConsumerStatsManager().incConsumeOKTPS(consumerGroup, consumeRequest.getMessageQueue().getTopic(), msgs.size());
break;
// 消费失败,消息重新放回processQueue,挂起消费队列一会儿,稍后继续消费
case SUSPEND_CURRENT_QUEUE_A_MOMENT:
this.getConsumerStatsManager().incConsumeFailedTPS(consumerGroup, consumeRequest.getMessageQueue().getTopic(), msgs.size());
// 检查已经重试次数,如果已经达到最大重试次数,则将消息转发到死信队列,
// 还没到达最大次数,则挂起当前队列一会儿
if (checkReconsumeTimes(msgs)) {
// 从consumingMsgOrderlyTreeMap中移除这些消息
// 将消息重新放回msgTreeMap中原来的位置,这样取出来的消息还是这些消息,实现重试消息
consumeRequest.getProcessQueue().makeMessageToCosumeAgain(msgs);
// 10ms后重新提交消费请求
this.submitConsumeRequestLater(
consumeRequest.getProcessQueue(),
consumeRequest.getMessageQueue(),
context.getSuspendCurrentQueueTimeMillis());
// 遇到要重试消息,该线程就不继续执行了
continueConsume = false;
} else {
// 消息转发到死信队列了,此时,当成功处理,消息队列无需挂起,继续消费后面的消息
commitOffset = consumeRequest.getProcessQueue().commit();
}
break;
default:
break;
}
} else {
switch (status) {
case SUCCESS:
this.getConsumerStatsManager().incConsumeOKTPS(consumerGroup, consumeRequest.getMessageQueue().getTopic(), msgs.size());
break;
case COMMIT:
commitOffset = consumeRequest.getProcessQueue().commit();
break;
case ROLLBACK:
consumeRequest.getProcessQueue().rollback();
this.submitConsumeRequestLater(
consumeRequest.getProcessQueue(),
consumeRequest.getMessageQueue(),
context.getSuspendCurrentQueueTimeMillis());
continueConsume = false;
break;
case SUSPEND_CURRENT_QUEUE_A_MOMENT:
this.getConsumerStatsManager().incConsumeFailedTPS(consumerGroup, consumeRequest.getMessageQueue().getTopic(), msgs.size());
if (checkReconsumeTimes(msgs)) {
consumeRequest.getProcessQueue().makeMessageToCosumeAgain(msgs);
this.submitConsumeRequestLater(
consumeRequest.getProcessQueue(),
consumeRequest.getMessageQueue(),
context.getSuspendCurrentQueueTimeMillis());
continueConsume = false;
}
break;
default:
break;
}
}
// 消息处理队列未dropped,提交有效消费进度
if (commitOffset >= 0 && !consumeRequest.getProcessQueue().isDropped()) {
this.defaultMQPushConsumerImpl.getOffsetStore().updateOffset(consumeRequest.getMessageQueue(), commitOffset, false);
}
return continueConsume;
}
这里为了保证顺序性,如果消费失败,则需要将消息重新放回processQueue中msgTreeMap中原来的位置,挂起消费队列一会儿,稍后继续消费,这样取出来的消息还是这些消息,实现重试消息。
参考资料
《儒猿技术窝——从 0 开始带你成为消息中间件实战高手》