1. broker启动时,从文件中加载consumeOffset记录
org.apache.rocketmq.broker.BrokerController#initialize
public boolean initialize() throws CloneNotSupportedException {
...
result = result && this.consumerOffsetManager.load();
...
}
org.apache.rocketmq.common.ConfigManager#load
public boolean load() {
String fileName = null;
try {
fileName = this.configFilePath();
String jsonString = MixAll.file2String(fileName);
if (null == jsonString || jsonString.length() == 0) {
return this.loadBak();
} else {
this.decode(jsonString);
log.info("load " + fileName + " OK");
return true;
}
} catch (Exception e) {
log.error("load " + fileName + " failed, and try to load backup file", e);
return this.loadBak();
}
}
org.apache.rocketmq.broker.offset.ConsumerOffsetManager#decode
public void decode(String jsonString) {
if (jsonString != null) {
ConsumerOffsetManager obj = RemotingSerializable.fromJson(jsonString, ConsumerOffsetManager.class);
if (obj != null) {
this.offsetTable = obj.offsetTable;
}
}
}
加载磁盘文件:rootDir + File.separator + "config" + File.separator + "consumerOffset.json";
然后赋值给offsetTable
文件内容样例为:
2. consumer重平衡messageQueue后获取consumeOffset
集群消费模式下,consumer端的offsetStore是RemoteBrokerOffsetStore
org.apache.rocketmq.client.impl.consumer.DefaultMQPushConsumerImpl#start
if (this.defaultMQPushConsumer.getOffsetStore() != null) { this.offsetStore = this.defaultMQPushConsumer.getOffsetStore(); } else { switch (this.defaultMQPushConsumer.getMessageModel()) { case BROADCASTING: this.offsetStore = new LocalFileOffsetStore(this.mQClientFactory, this.defaultMQPushConsumer.getConsumerGroup()); break; case CLUSTERING: this.offsetStore = new RemoteBrokerOffsetStore(this.mQClientFactory, this.defaultMQPushConsumer.getConsumerGroup()); break; default: break; } this.defaultMQPushConsumer.setOffsetStore(this.offsetStore); }
org.apache.rocketmq.client.impl.consumer.RebalanceImpl#updateProcessQueueTableInRebalance
private boolean updateProcessQueueTableInRebalance(final String topic, final Set<MessageQueue> mqSet,
final boolean isOrder) {
..
// 上面完成了之前已分配的messageQueue的删除
// 下面接着处理新增的messageQueue
List<PullRequest> pullRequestList = new ArrayList<PullRequest>();
for (MessageQueue mq : mqSet) {
if (!this.processQueueTable.containsKey(mq)) {
// 顺序消费,但是锁定messageQueue失败
if (isOrder && !this.lock(mq)) {
log.warn("doRebalance, {}, add a new mq failed, {}, because lock failed", consumerGroup, mq);
continue;
}
//从内存中删除这个消费队列的消费进度
this.removeDirtyOffset(mq);
ProcessQueue pq = new ProcessQueue();
// 计算新messageQueue从哪里开始消费
long nextOffset = -1L;
try {
// 计算consumeOffset
nextOffset = this.computePullFromWhereWithException(mq);
} catch (Exception e) {
log.info("doRebalance, {}, compute offset failed, {}", consumerGroup, mq);
continue;
}
...
}
}
//发送一个pullRequest
this.dispatchPullRequest(pullRequestList);
return changed;
}
org.apache.rocketmq.client.impl.c
public long computePullFromWhereWithException(MessageQueue mq) throws MQClientException {
long result = -1;
// 获取consumer配置中的消费起点,默认是ConsumeFromWhere.CONSUME_FROM_LAST_OFFSET
final ConsumeFromWhere consumeFromWhere = this.defaultMQPushConsumerImpl.getDefaultMQPushConsumer().getConsumeFromWhere();
// 集群模式下,RemoteBrokerOffsetStore
final OffsetStore offsetStore = this.defaultMQPushConsumerImpl.getOffsetStore();
switch (consumeFromWhere) {
case CONSUME_FROM_LAST_OFFSET_AND_FROM_MIN_WHEN_BOOT_FIRST:
case CONSUME_FROM_MIN_OFFSET:
case CONSUME_FROM_MAX_OFFSET:
case CONSUME_FROM_LAST_OFFSET: {
// 从broker读取消费进度
// 正常offset >= 0
// 在broker中没有消费进度,返回 -1
// 异常 -2
long lastOffset = offsetStore.readOffset(mq, ReadOffsetType.READ_FROM_STORE);
if (lastOffset >= 0) {
result = lastOffset;
}
// First start,no offset
// -1,代表broker上没有consumeGroup+topic+queueId的消费进度,
// 认为是初次启动,broker没有偏移,
else if (-1 == lastOffset) {
// 重试topic,返回0,
if (mq.getTopic().startsWith(MixAll.RETRY_GROUP_TOPIC_PREFIX)) {
result = 0L;
} else {
// 返回consumeGroup+topic+queueId 获取该消费队列的最大偏移
try {
result = this.mQClientFactory.getMQAdminImpl().maxOffset(mq);
} catch (MQClientException e) {
log.warn("Compute consume offset from last offset exception, mq={}, exception={}", mq, e);
throw e;
}
}
} else {
// 异常情况时readOffset()返回-2, 这里返回-1
result = -1;
}
break;
}
// 从最小的offset开始消费
case CONSUME_FROM_FIRST_OFFSET: {
// 但是这里还是先从broker中读取offset,如果读取成功,就返回读取的offset,此时CONSUME_FROM_FIRST_OFFSET 无效
long lastOffset = offsetStore.readOffset(mq, ReadOffsetType.READ_FROM_STORE);
if (lastOffset >= 0) {
result = lastOffset;
}
// 认为consumeGroup+topic+queueId第一次被消费
else if (-1 == lastOffset) {
result = 0L;
}
// 异常情况
else {
result = -1;
}
break;
}
// 从指定的时间开始消费
case CONSUME_FROM_TIMESTAMP: {
// 但是这里还是先从broker中读取offset,如果读取成功,就返回读取的offset,此时CONSUME_FROM_TIMESTAMP 无效
long lastOffset = offsetStore.readOffset(mq, ReadOffsetType.READ_FROM_STORE);
if (lastOffset >= 0) {
result = lastOffset;
}
// consumeGroup+topic+queueId第一次被消费
else if (-1 == lastOffset) {
// 重试队列
if (mq.getTopic().startsWith(MixAll.RETRY_GROUP_TOPIC_PREFIX)) {
try {
// 返回的是重试队列最大的offset
result = this.mQClientFactory.getMQAdminImpl().maxOffset(mq);
} catch (MQClientException e) {
log.warn("Compute consume offset from last offset exception, mq={}, exception={}", mq, e);
throw e;
}
} else {
// 普通队列
// 按设置的时间去查找相应的offset
try {
long timestamp = UtilAll.parseDate(this.defaultMQPushConsumerImpl.getDefaultMQPushConsumer().getConsumeTimestamp(),
UtilAll.YYYYMMDDHHMMSS).getTime();
result = this.mQClientFactory.getMQAdminImpl().searchOffset(mq, timestamp);
} catch (MQClientException e) {
log.warn("Compute consume offset from last offset exception, mq={}, exception={}", mq, e);
throw e;
}
}
} else {
result = -1;
}
break;
}
default:
break;
}
return result;
}
onsumer.RebalancePushImpl#computePullFromWhereWithException
目前有效的consumeFromWhere是:
CONSUME_FROM_LAST_OFFSET:从最后一个offset消费,
CONSUME_FROM_FIRST_OFFSET:从最小的offset消费,
CONSUME_FROM_TIMESTAMP:从指定的时间处开始消费,
其他的consumeFromWhere都被标注了@Deprecated,认为他们等同于CONSUME_FROM_LAST_OFFSET
从上面的分析可以看出,不管设置的是哪一种consumeFromWhere,都会先从broker去查询当前的consumeOffset, 如果broker上有消费记录,就之前的consumeOffset继续消费,
即consumeGroup+topic+queueId之前有过消费记录,此时consumeFromWhere设置是无效的,还是按之前的consumeOffset继续消费。
如果consumeGroup+topic+queueId是第一次去消费,那么这是是按consumeFromWhere设置的去消费。
但是对于CONSUME_FROM_LAST_OFFSET有一个特殊的情况时是不生效的,新consumeGroup+topic+queueId去消费时,这个queueId的offset=0的数据还在内存中,broker会返回0的,为什么呢?
如果topic扩容,那必然会有一些新增的queueId, 这些queueId的消费记录是不存在的,如果按新消费的去处理,CONSUME_FROM_LAST_OFFSET时就会从最大的offset开始去消费,这就有可能会丢失一部分未消费的数据。对于这种情况(新消费,并且queueId的offset=0的数据还在内存中),broker返回offset=0,从头开始消费。
当然这种情况也常常导致topic数据量不大且未扩容情况下,CONSUME_FROM_LAST_OFFSET不生效。
3. broker响应consumer的consumeOffset查询
broker使用consumeManageProcessor去管理consumer
org.apache.rocketmq.broker.processor.ConsumerManageProcessor#queryConsumerOffset
private RemotingCommand queryConsumerOffset(ChannelHandlerContext ctx, RemotingCommand request)
throws RemotingCommandException {
final RemotingCommand response =
RemotingCommand.createResponseCommand(QueryConsumerOffsetResponseHeader.class);
final QueryConsumerOffsetResponseHeader responseHeader =
(QueryConsumerOffsetResponseHeader) response.readCustomHeader();
final QueryConsumerOffsetRequestHeader requestHeader =
(QueryConsumerOffsetRequestHeader) request
.decodeCommandCustomHeader(QueryConsumerOffsetRequestHeader.class);
// 从消费offset缓存中读取offset
// 消费offset缓存在不存在或者queueId对应的消费不存在,queryOffset返回-1
long offset =
this.brokerController.getConsumerOffsetManager().queryOffset(
requestHeader.getConsumerGroup(), requestHeader.getTopic(), requestHeader.getQueueId());
if (offset >= 0) {
responseHeader.setOffset(offset);
response.setCode(ResponseCode.SUCCESS);
response.setRemark(null);
} else {
// 读取consumeQueue的最小offset,
// 如果消费缓存中没有topic和queueId相关的缓存,就构建一个缓存,新构建缓存的就会返回0
long minOffset =
this.brokerController.getMessageStore().getMinOffsetInQueue(requestHeader.getTopic(),
requestHeader.getQueueId());
// 返回0时,判断是否还在pageCache中,
if (minOffset <= 0
// checkInDiskByConsumeOffset(...0)判断这个consumeQueue的offset=0数据是不是在文件中
// !checkInDiskByConsumeOffset(...0)判断这个consumeQueue的offset=0的数据是不是在内存中
&& !this.brokerController.getMessageStore().checkInDiskByConsumeOffset(
requestHeader.getTopic(), requestHeader.getQueueId(), 0)) {
// 如果minOffset=0,并且consumeOffset=0在内存中,
// 说明这个topic+queueId积压的消息不多,就让客户端从0开始读取
// 尤其对于Topic队列数动态扩容时,必须要从0开始消费。
// 此时consume设置CONSUME_FROM_LAST_OFFSET也是无效的
// 如果是扩容,这个queueId是新增的,这里不返回0,而是走下面的else,返回QUERY_NOT_FOUND
// 那么consumer会解析成-1,当consume设置成CONSUME_FROM_LAST_OFFSET时,就会从最大的offset开始读取,可能会丢失消息
responseHeader.setOffset(0L);
response.setCode(ResponseCode.SUCCESS);
response.setRemark(null);
} else {
// 认为是新的消费组
response.setCode(ResponseCode.QUERY_NOT_FOUND);
response.setRemark("Not found, V3_0_6_SNAPSHOT maybe this group consumer boot first");
}
}
return response;
}
如果从offsetTable中能读取到offset,就代表是曾经有过消费记录,直接从offsetTable中读取到并返回就行。
从offsetTable读取不到时,就返回-2,就会去执行brokerController.getMessageStore().getMinOffsetInQueue(),去读取consumeQueue的最小offset,但是是从缓存consumeQueueTable中读取,读取不到是会新建一个缓存记录,对于新建的缓存记录,minOffset会是0,新建缓存记录失败时,minOffset=-1
当minoffset=-1,区分两种情况,queueId中offset=0的数据,还在内存中,就会返回0,此时可能会导致CONSUME_FROM_LAST_OFFSET不生效的情况,具体原因在上面2中已经分析了。
queueId中offset=0的数据,不再内存中,才会真正的认为是新消费。
4. broker校准PullRequest中的offset
未完待续
5. consumer定时更新更新consumeOffset
defaultMQPushConsumerImpl.start() --> mQClientFactory.start() -->> startScheduledTask()
private void startScheduledTask() {
...
// 定时任务,持久化消费进度
this.scheduledExecutorService.scheduleAtFixedRate(new Runnable() {
@Override
public void run() {
try {
MQClientInstance.this.persistAllConsumerOffset();
} catch (Exception e) {
log.error("ScheduledTask persistAllConsumerOffset exception", e);
}
}
}, 1000 * 10, this.clientConfig.getPersistConsumerOffsetInterval(), TimeUnit.MILLISECONDS);
}å
默认5s持久化一次,
private void persistAllConsumerOffset() {
// 消费列表
// topic --> consumer
Iterator<Entry<String, MQConsumerInner>> it = this.consumerTable.entrySet().iterator();
while (it.hasNext()) {
Entry<String, MQConsumerInner> entry = it.next();
MQConsumerInner impl = entry.getValue();
impl.persistConsumerOffset();
}
}
DefaultMQPushConsumerImpl.persistConsumerOffset()
public void persistConsumerOffset() {
try {
this.makeSureStateOK();
Set<MessageQueue> mqs = new HashSet<MessageQueue>();
// 获取申请到的messageQueue
Set<MessageQueue> allocateMq = this.rebalanceImpl.getProcessQueueTable().keySet();
mqs.addAll(allocateMq);
this.offsetStore.persistAll(mqs);
} catch (Exception e) {
log.error("group: " + this.defaultMQPushConsumer.getConsumerGroup() + " persistConsumerOffset exception", e);
}
}
org.apache.rocketmq.client.consumer.store.RemoteBrokerOffsetStore#persistAll
public void persistAll(Set<MessageQueue> mqs) {
if (null == mqs || mqs.isEmpty())
return;
final HashSet<MessageQueue> unusedMQ = new HashSet<MessageQueue>();
for (Map.Entry<MessageQueue, AtomicLong> entry : this.offsetTable.entrySet()) {
MessageQueue mq = entry.getKey();
AtomicLong offset = entry.getValue();
if (offset != null) {
if (mqs.contains(mq)) {
// 有消费进度的messageQueue
try {
this.updateConsumeOffsetToBroker(mq, offset.get());
log.info("[persistAll] Group: {} ClientId: {} updateConsumeOffsetToBroker {} {}",
this.groupName,
this.mQClientFactory.getClientId(),
mq,
offset.get());
} catch (Exception e) {
log.error("updateConsumeOffsetToBroker exception, " + mq.toString(), e);
}
} else {
// 没有消费进度的messageQueue
unusedMQ.add(mq);
}
}
}
if (!unusedMQ.isEmpty()) {
// 删除未消费的messageQueue
for (MessageQueue mq : unusedMQ) {
this.offsetTable.remove(mq);
log.info("remove unused mq, {}, {}", mq, this.groupName);
}
}
}
最终是将offsetTable发送到broker上
6. consumer拉取消息时顺带commitOffset
未完待续