前言
在queue队列数量弹性变化、consumer数量弹性变化时,需要对consumer客户端进行Rebalance重平衡,重新优化consumer和queue之间的关系,RebalanceLockManager就负责在重平衡时,进行重平衡锁定关系的管理。
源码版本:4.9.3
源码架构图
核心数据结构
RebalanceLockManager重平衡锁定组件,在内存中设计了一个table核心数据结构,Map<consumerGroup, Map<MessageQueue, LockEntry>>,维护了指定consumerGroup下所有队列的锁定状态。
// 核心数据结构,Map<consumerGroup, Map<MessageQueue, LockEntry>>,key为消费组,value为MessageQueue和LockEntry的映射
// 针对每个消费组的每个MessageQueue都有一份锁定信息
private final ConcurrentMap<String/* group */, ConcurrentHashMap<MessageQueue, LockEntry>> mqLockTable =
new ConcurrentHashMap<String, ConcurrentHashMap<MessageQueue, LockEntry>>(1024);
核心数据行为
锁定、批量锁定、批量释放主要行为都在源码中添加了注释:
public class RebalanceLockManager {
private static final InternalLogger log = InternalLoggerFactory.getLogger(LoggerName.REBALANCE_LOCK_LOGGER_NAME);
// 重平衡锁定最大存活时间,60s
private final static long REBALANCE_LOCK_MAX_LIVE_TIME = Long.parseLong(System.getProperty(
"rocketmq.broker.rebalance.lockMaxLiveTime", "60000"));
// lock
private final Lock lock = new ReentrantLock();
// 核心数据结构,Map<consumerGroup, Map<MessageQueue, LockEntry>>,key为消费组,value为MessageQueue和LockEntry的映射
// 针对每个消费组的每个MessageQueue都有一份锁定信息
private final ConcurrentMap<String/* group */, ConcurrentHashMap<MessageQueue, LockEntry>> mqLockTable =
new ConcurrentHashMap<String, ConcurrentHashMap<MessageQueue, LockEntry>>(1024);
public boolean tryLock(final String group, final MessageQueue mq, final String clientId) {
// 先检查是否已经被当前客户端锁定,如果不是
if (!this.isLocked(group, mq, clientId)) {
try {
// 加锁
this.lock.lockInterruptibly();
try {
ConcurrentHashMap<MessageQueue, LockEntry> groupValue = this.mqLockTable.get(group);
if (null == groupValue) {
groupValue = new ConcurrentHashMap<>(32);
this.mqLockTable.put(group, groupValue);
}
LockEntry lockEntry = groupValue.get(mq);
if (null == lockEntry) {
lockEntry = new LockEntry();
lockEntry.setClientId(clientId);
groupValue.put(mq, lockEntry);
log.info("tryLock, message queue not locked, I got it. Group: {} NewClientId: {} {}",
group,
clientId,
mq);
}
// 如果当前客户端已经锁定了,则直接返回
// 这里形成了双检锁,两次检查是不是被当前客户端锁定,是的话,跟新最后更新时间,返回true
if (lockEntry.isLocked(clientId)) {
lockEntry.setLastUpdateTimestamp(System.currentTimeMillis());
return true;
}
String oldClientId = lockEntry.getClientId();
// 如果当前客户端锁定时间超过了最大存活时间,则重新锁定
if (lockEntry.isExpired()) {
lockEntry.setClientId(clientId);
lockEntry.setLastUpdateTimestamp(System.currentTimeMillis());
log.warn(
"tryLock, message queue lock expired, I got it. Group: {} OldClientId: {} NewClientId: {} {}",
group,
oldClientId,
clientId,
mq);
return true;
}
// 如果不是当前客户端id,则返回false
log.warn(
"tryLock, message queue locked by other client. Group: {} OtherClientId: {} NewClientId: {} {}",
group,
oldClientId,
clientId,
mq);
return false;
} finally {
this.lock.unlock();
}
} catch (InterruptedException e) {
log.error("putMessage exception", e);
}
} else {
}
return true;
}
// 指定group-mq,是否被当前客户端锁定
private boolean isLocked(final String group, final MessageQueue mq, final String clientId) {
ConcurrentHashMap<MessageQueue, LockEntry> groupValue = this.mqLockTable.get(group);
if (groupValue != null) {
LockEntry lockEntry = groupValue.get(mq);
if (lockEntry != null) {
// 锁定状态,已被当前客户端锁定
boolean locked = lockEntry.isLocked(clientId);
if (locked) {
// 更新锁定的最后更新时间
lockEntry.setLastUpdateTimestamp(System.currentTimeMillis());
}
return locked;
}
}
return false;
}
// 尝试对指定group-mqs批量锁定
public Set<MessageQueue> tryLockBatch(final String group, final Set<MessageQueue> mqs,
final String clientId) {
// 构造批量返回结果
Set<MessageQueue> lockedMqs = new HashSet<MessageQueue>(mqs.size());
Set<MessageQueue> notLockedMqs = new HashSet<MessageQueue>(mqs.size());
// 遍历mqs,进行分类,已当前客户端被锁定和未被锁定
for (MessageQueue mq : mqs) {
if (this.isLocked(group, mq, clientId)) {
lockedMqs.add(mq);
} else {
notLockedMqs.add(mq);
}
}
// 未被锁定的列表,才获取全局锁,进行锁定,并更新锁定信息。
// 这样处理可以提高性能,减少锁定操作。
if (!notLockedMqs.isEmpty()) {
try {
// 获取全局锁
this.lock.lockInterruptibly();
try {
ConcurrentHashMap<MessageQueue, LockEntry> groupValue = this.mqLockTable.get(group);
if (null == groupValue) {
groupValue = new ConcurrentHashMap<>(32);
this.mqLockTable.put(group, groupValue);
}
for (MessageQueue mq : notLockedMqs) {
LockEntry lockEntry = groupValue.get(mq);
// 如果当前队列未被锁定,创建一个lock对象
if (null == lockEntry) {
lockEntry = new LockEntry();
lockEntry.setClientId(clientId);
groupValue.put(mq, lockEntry);
log.info(
"tryLockBatch, message queue not locked, I got it. Group: {} NewClientId: {} {}",
group,
clientId,
mq);
}
// 如果当前客户端已经锁定了,则直接返回?
// 说明有并发的客户端在尝试锁定,且已经成功,则直接跳过
if (lockEntry.isLocked(clientId)) {
lockEntry.setLastUpdateTimestamp(System.currentTimeMillis());
lockedMqs.add(mq);
continue;
}
String oldClientId = lockEntry.getClientId();
// 如果被其他客户端锁定,但是已经过期,则重新锁定
if (lockEntry.isExpired()) {
lockEntry.setClientId(clientId);
lockEntry.setLastUpdateTimestamp(System.currentTimeMillis());
log.warn(
"tryLockBatch, message queue lock expired, I got it. Group: {} OldClientId: {} NewClientId: {} {}",
group,
oldClientId,
clientId,
mq);
lockedMqs.add(mq);
continue;
}
// 如果被其他客户端持续锁定,则不做任何处理
log.warn(
"tryLockBatch, message queue locked by other client. Group: {} OtherClientId: {} NewClientId: {} {}",
group,
oldClientId,
clientId,
mq);
}
} finally {
this.lock.unlock();
}
} catch (InterruptedException e) {
log.error("putMessage exception", e);
}
}
// 返回成功锁定的列表
return lockedMqs;
}
// 批量释放锁定
public void unlockBatch(final String group, final Set<MessageQueue> mqs, final String clientId) {
try {
// 获取全局锁
this.lock.lockInterruptibly();
try {
// 获取group-mqs
ConcurrentHashMap<MessageQueue, LockEntry> groupValue = this.mqLockTable.get(group);
if (null != groupValue) {
// 遍历mqs
for (MessageQueue mq : mqs) {
// 获取指定queue的锁定信息
LockEntry lockEntry = groupValue.get(mq);
if (null != lockEntry) {
// 如果是当前客户端锁定,则释放锁定
if (lockEntry.getClientId().equals(clientId)) {
groupValue.remove(mq);
log.info("unlockBatch, Group: {} {} {}",
group,
mq,
clientId);
} else {
log.warn("unlockBatch, but mq locked by other client: {}, Group: {} {} {}",
lockEntry.getClientId(),
group,
mq,
clientId);
}
} else {
log.warn("unlockBatch, but mq not locked, Group: {} {} {}",
group,
mq,
clientId);
}
}
} else {
log.warn("unlockBatch, group not exist, Group: {} {}",
group,
clientId);
}
} finally {
this.lock.unlock();
}
} catch (InterruptedException e) {
log.error("putMessage exception", e);
}
}
static class LockEntry {
// 客户端id
private String clientId;
// 最后更新时间
private volatile long lastUpdateTimestamp = System.currentTimeMillis();
public String getClientId() {
return clientId;
}
public void setClientId(String clientId) {
this.clientId = clientId;
}
public long getLastUpdateTimestamp() {
return lastUpdateTimestamp;
}
public void setLastUpdateTimestamp(long lastUpdateTimestamp) {
this.lastUpdateTimestamp = lastUpdateTimestamp;
}
public boolean isLocked(final String clientId) {
boolean eq = this.clientId.equals(clientId);
return eq && !this.isExpired();
}
public boolean isExpired() {
boolean expired =
(System.currentTimeMillis() - this.lastUpdateTimestamp) > REBALANCE_LOCK_MAX_LIVE_TIME;
return expired;
}
}
}