1 Overview
在分布式系统中,通常会避免使用分布式锁。然而在某些场景下,还是存在对分布式锁的需求。跟普通锁相比,分布式锁面需要对的问题更多,例如怎样保证某个进程在持有锁时意外终止之后,其它进程也能够正常地获得锁等等。笔者认为一个比较好的分布式锁实现是Terracotta,但是这不是本文的重点,感兴趣的读者可以参考笔者的Terracotta in Action 系列文章(http://whitesock.iteye.com/blog/351780 , http://whitesock.iteye.com/blog/352876 , http://whitesock.iteye.com/blog/354587 )。
除了Terracotta,不少其它开源项目也声称支持分布式锁,例如ZooKeeper,JGroups和Hazelcast等。在这些项目中,笔者倾向于使用ZooKeeper。ZooKeeper在其官方文档的ZooKeeper Recipes and Solutions章节中介绍了一个分布式锁的实现,本文主要对该版本进行了改良。关于Hazelcast,笔者不得不说,其官方文档文字不少但却苍白,很多内容介绍的都是浅尝辄止,难道是强迫开发人员去仔细地阅读源码,或者参加其价格不菲的培训?
2 Implementation
首先,笔者希望分布式锁能够支持Java并发包中的Lock接口,并且最好是可重入的。此外,在某个进程持有分布式锁的过程中,如果不能保证该锁不会被其它进程同时持有(例如网络故障),那么至少应该能够通知锁的持有者,以便其采取相应的应对措施。以下是笔者对分布式锁的定义:
import java.util.concurrent.locks.Lock;
public interface DistributedLock extends Lock {
Listener getListener();
void setListener(Listener listener);
/**
*
*/
interface Listener {
void onAbort(DistributedLock lock, Exception e);
}
}
其中Listener接口的作用是,在无法排它独占该锁时进行回调。接下来是笔者的两个实现的共通父类。
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.ReentrantLock;
public abstract class AbstractDistributedLock implements DistributedLock {
//
protected volatile boolean verbose;
protected volatile Listener listener;
protected final ReentrantLock lock = new ReentrantLock();
//
protected abstract void doLock();
protected abstract void doUnlock();
protected abstract boolean doTryLock();
protected abstract void doLockInterruptibly() throws InterruptedException;
protected abstract boolean doTryLock(long timeout, TimeUnit unit) throws InterruptedException;
/**
*
*/
public boolean isVerbose() {
return verbose;
}
public void setVerbose(boolean verbose) {
this.verbose = verbose;
}
public boolean isLocked() {
return this.lock.isLocked();
}
public boolean isHeldByCurrentThread() {
return this.lock.isHeldByCurrentThread();
}
/**
*
*/
@Override
public Listener getListener() {
return this.listener;
}
@Override
public void setListener(Listener listener) {
this.listener = listener;
}
/**
*
*/
@Override
public void lock() {
//
this.lock.lock();
if(this.lock.getHoldCount() > 1) return;
//
boolean succeed = false;
try {
doLock();
succeed = true;
} finally {
if(!succeed) {
this.lock.unlock();
}
}
}
@Override
public void lockInterruptibly() throws InterruptedException {
//
this.lock.lockInterruptibly();
if(this.lock.getHoldCount() > 1) return;
//
boolean succeed = false;
try {
doLockInterruptibly();
succeed = true;
} finally {
if(!succeed) {
this.lock.unlock();
}
}
}
@Override
public boolean tryLock() {
//
if(!this.lock.tryLock()) return false;
if(this.lock.getHoldCount() > 1) return true;
//
boolean succeed = false;
try {
succeed = doTryLock();
} finally {
if(!succeed) {
this.lock.unlock();
}
}
return succeed;
}
@Override
public boolean tryLock(long timeout, TimeUnit unit) throws InterruptedException {
//
final long mark = System.nanoTime();
if(!this.lock.tryLock(timeout, unit)) return false;
if(this.lock.getHoldCount() > 1) return true;
//
boolean succeed = false;
try {
timeout = TimeUnit.NANOSECONDS.convert(timeout, unit) - (System.nanoTime() - mark);
if(timeout >= 0) {
succeed = doTryLock(timeout, TimeUnit.NANOSECONDS);
}
} finally {
if(!succeed) {
this.lock.unlock();
}
}
return succeed;
}
@Override
public void unlock() {
//
if(!this.lock.isHeldByCurrentThread()) return;
if(this.lock.getHoldCount() > 1) return;
//
try {
doUnlock();
} finally {
this.lock.unlock();
}
}
@Override
public Condition newCondition() {
throw new UnsupportedOperationException();
}
}
2.1 MySQL Named Lock
在讨论ZooKeeper的分布式锁实现之前,先介绍一下笔者基于MySQL Named Lock的一个实现。
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference;
import javax.sql.DataSource;
import org.apache.commons.lang.builder.ToStringBuilder;
import org.apache.commons.lang.builder.ToStringStyle;
import org.apache.commons.lang.exception.NestableRuntimeException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public final class MySQLNamedLock extends AbstractDistributedLock {
//
private static final Logger LOGGER = LoggerFactory.getLogger(MySQLNamedLock.class);
//
private String name;
private DataSource dataSource;
private long validationInterval = 1000L;
private ScheduledExecutorService scheduler;
private final AtomicReference<Connection> connection;
private final AtomicReference<ScheduledFuture<?>> future;
/**
*
*/
public MySQLNamedLock() {
this(null, null, null);
}
public MySQLNamedLock(String name, DataSource dataSource, ScheduledExecutorService scheduler) {
this.name = name;
this.scheduler = scheduler;
this.dataSource = dataSource;
this.connection = new AtomicReference<Connection>();
this.future = new AtomicReference<ScheduledFuture<?>>();
}
/**
*
*/
@Override
public String toString() {
return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
.append("name", this.name).toString();
}
/**
*
*/
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public long getValidationInterval() {
return validationInterval;
}
public void setValidationInterval(long interval) {
this.validationInterval = interval;
}
public DataSource getDataSource() {
return dataSource;
}
public void setDataSource(DataSource dataSource) {
this.dataSource = dataSource;
}
public ScheduledExecutorService getScheduler() {
return scheduler;
}
public void setScheduler(ScheduledExecutorService scheduler) {
this.scheduler = scheduler;
}
/**
*
*/
@Override
protected void doLock() {
doTryLock(Integer.MAX_VALUE, TimeUnit.SECONDS);
}
@Override
protected void doLockInterruptibly() {
doTryLock(Integer.MAX_VALUE, TimeUnit.SECONDS);
}
@Override
protected boolean doTryLock() {
return doTryLock(0, TimeUnit.SECONDS);
}
@Override
protected boolean doTryLock(long timeout, TimeUnit unit) {
//
Integer r = null;
ResultSet rs = null;
PreparedStatement ps = null;
try {
this.connection.set(this.dataSource.getConnection());
ps = this.connection.get().prepareStatement("SELECT GET_LOCK(?, ?)");
ps.setString(1, this.name);
ps.setInt(2, (int)TimeUnit.SECONDS.convert(timeout, unit));
rs = ps.executeQuery();
if(rs.next()) {
r = rs.getInt(1);
if(rs.wasNull()) r = null;
}
} catch(Exception e) {
throw new NestableRuntimeException("failed to lock, name: " + this.name, e);
} finally {
JdbcUtils.closeQuietly(rs);
JdbcUtils.closeQuietly(ps);
}
//
final boolean succeed = (r != null && r == 1);
if(succeed && this.listener != null) {
final long interval = this.validationInterval;
this.future.set(this.scheduler.scheduleWithFixedDelay(new ValidationTask(), interval, interval, TimeUnit.MILLISECONDS));
}
//
return succeed;
}
@Override
protected void doUnlock() {
//
final ScheduledFuture<?> f = this.future.getAndSet(null);
if(f != null) f.cancel(true);
//
Integer r = null;
ResultSet rs = null;
PreparedStatement ps = null;
try {
//
ps = this.connection.get().prepareStatement("SELECT RELEASE_LOCK(?)");
ps.setString(1, this.name);
rs = ps.executeQuery();
if(rs.next()) {
r = rs.getInt(1);
if(rs.wasNull()) r = null;
}
//
if(r == null) {
LOGGER.warn("lock does NOT exist, name: {}", this.name);
} else if(r == 0) {
LOGGER.warn("lock was NOT accquired by current thread, name: {}", this.name);
} else {
LOGGER.warn("failed to unlock, name: {}, result: {}", this.name, r);
}
} catch(Exception e) {
throw new NestableRuntimeException("failed to unlock, name: " + this.name, e);
} finally {
JdbcUtils.closeQuietly(rs);
JdbcUtils.closeQuietly(ps);
JdbcUtils.closeQuietly(this.connection.getAndSet(null));
}
}
/**
*
*/
private class ValidationTask implements Runnable {
@Override
public void run() {
try {
((com.mysql.jdbc.Connection)connection.get()).ping();
} catch(Exception e) {
//
if(isLocked() && listener != null && connection.get() != null) {
listener.onAbort(MySQLNamedLock.this, e);
}
//
throw new NestableRuntimeException(e); // Note: suppress subsequent executions
}
}
}
}
需要注意的是,如果在该锁上注册了Listener,并且Connection在持有锁的过程中失效,那么该Listener会被回调。
2.2 ZooKeeper Lock
以下代码是笔者对ZooKeeper官方版本的改良:
import java.lang.management.ManagementFactory;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference;
import org.apache.commons.lang.builder.ToStringBuilder;
import org.apache.commons.lang.builder.ToStringStyle;
import org.apache.commons.lang.exception.NestableRuntimeException;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.ZooDefs;
import org.apache.zookeeper.ZooKeeper;
import org.apache.zookeeper.data.Stat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public final class ZooKeeperLock extends AbstractDistributedLock {
//
private static final Logger LOGGER = LoggerFactory.getLogger(ZooKeeperLock.class);
//
private String directory;
private ZooKeeper zookeeper;
private final String processName;
private final AtomicReference<ZooKeeperLocker> locker;
/**
*
*/
public ZooKeeperLock() {
this(null, null);
}
public ZooKeeperLock(ZooKeeper zookeeper, String directory) {
this.zookeeper = zookeeper;
this.directory = directory;
this.locker = new AtomicReference<ZooKeeperLocker>();
this.processName = ManagementFactory.getRuntimeMXBean().getName();
}
/**
*
*/
@Override
public String toString() {
return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
.append("directory", this.directory).toString();
}
/**
*
*/
public String getDirectory() {
return directory;
}
public void setDirectory(String directory) {
this.directory = directory;
}
public ZooKeeper getZookeeper() {
return zookeeper;
}
public void setZookeeper(ZooKeeper zookeeper) {
this.zookeeper = zookeeper;
}
/**
*
*/
@Override
protected void doLock() {
doTryLock(Integer.MAX_VALUE, TimeUnit.SECONDS);
}
@Override
protected void doLockInterruptibly() {
doTryLock(Integer.MAX_VALUE, TimeUnit.SECONDS);
}
@Override
protected boolean doTryLock() {
return doTryLock(0, TimeUnit.SECONDS);
}
@Override
protected boolean doTryLock(long timeout, TimeUnit unit) {
try {
this.locker.set(new ZooKeeperLocker());
return this.locker.get().lock(timeout, unit);
} catch(Exception e) {
throw new NestableRuntimeException("failed to lock, directory: " + this.directory, e);
}
}
@Override
protected void doUnlock() {
try {
this.locker.get().unlock();
} catch(Exception e) {
throw new NestableRuntimeException("failed to unlock, directory: " + this.directory, e);
} finally {
this.locker.set(null);
}
}
/**
*
*/
private class ZooKeeperLocker implements Watcher {
//
private volatile String name;
private volatile CountDownLatch latch;
/**
*
*/
@Override
public void process(WatchedEvent event) {
//
if(this.latch != null) {
this.latch.countDown();
}
//
if(isVerbose() && LOGGER.isInfoEnabled()) {
LOGGER.info("received an event: {}", event);
}
}
public boolean lock(long timeout, TimeUnit unit) throws Exception {
boolean succeed = false;
try {
do {
final long mark = System.nanoTime();
timeout = TimeUnit.NANOSECONDS.convert(timeout, unit);
try {
succeed = doLock(timeout, TimeUnit.NANOSECONDS);
break;
} catch (KeeperException.ConnectionLossException e) {
timeout -= (System.nanoTime() - mark);
if(isVerbose() && LOGGER.isInfoEnabled()) {
LOGGER.info("connection was lost, directory: {}, name: {}, message: {}", new Object[]{directory, this.name, e.getMessage()});
}
}
}
while(timeout > 0);
} finally {
if(!succeed) { // Unlock quietly
try {
unlock();
} catch(Exception e) {
LOGGER.warn("failed to unlock, directory: " + directory + ", name: " + this.name, e);
}
}
}
return succeed;
}
public void unlock() throws Exception {
try {
zookeeper.delete(directory + "/" + this.name, -1);
} catch (KeeperException.NoNodeException e) {
LOGGER.warn("node does NOT exist, directory: {}, name: {}, message: {}", new Object[]{directory, this.name, e.getMessage()});
} finally {
this.name = null;
}
}
/**
*
*/
private Boolean doLock(long timeout, TimeUnit unit) throws Exception {
boolean succeed = false;
do {
//
final long mark = System.nanoTime();
timeout = TimeUnit.NANOSECONDS.convert(timeout, unit);
//
if (this.name == null) {
this.name = findOrCreateChild();
}
//
final List<String> children = zookeeper.getChildren(directory, false);
if (children.isEmpty()) {
this.name = null;
LOGGER.warn("could not find any child, directory: {}, name: {}", new Object[]{directory, this.name});
} else {
final SequenceComparator comparator = new SequenceComparator();
Collections.sort(children, comparator);
final int index = Collections.binarySearch(children, this.name, comparator);
if (index > 0) { // Not the first one
this.latch = new CountDownLatch(1);
final String previous = children.get(index - 1);
final Stat stat = zookeeper.exists(directory + "/" + previous, this);
if (stat != null) {
this.latch.await(timeout, TimeUnit.NANOSECONDS);
this.latch = null;
} else {
LOGGER.warn("could not find the previous child, directory: {}, name: {}", new Object[]{directory, this.name});
}
} else {
final String owner = children.get(0);
if (this.name != null && owner != null && this.name.equals(owner)) {
succeed = true;
} else {
LOGGER.warn("the lock should be held by current thread, directory: {}, name: {}, owner: {}", new Object[]{directory, this.name, owner});
}
}
}
//
timeout -= (System.nanoTime() - mark);
} while (!succeed && timeout >= 0);
return succeed;
}
private String findOrCreateChild() throws Exception {
//
final String prefix = zookeeper.getSessionId() + "-";
final List<String> children = zookeeper.getChildren(directory, false);
for (String child : children) {
if (child.startsWith(prefix)) {
if(isVerbose() && LOGGER.isInfoEnabled()) {
LOGGER.info("found a child, directory: {}, child: {}", new Object[]{directory, child});
}
return child;
}
}
//
final String data = Thread.currentThread().getId() + "@" + processName;
final String path = zookeeper.create(directory + "/" + prefix, data.getBytes(), ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL_SEQUENTIAL);
final String child = path.substring(path.lastIndexOf("/") + 1);
if(isVerbose() && LOGGER.isInfoEnabled()) {
LOGGER.info("created a child, directory: {}, path: {}", new Object[]{directory, child});
}
return child;
}
}
/**
*
*/
private static class SequenceComparator implements Comparator<String> {
@Override
public int compare(String lhs, String rhs) {
final int index1 = lhs.lastIndexOf('-');
final int index2 = rhs.lastIndexOf('-');
final int sequence1 = Integer.parseInt(lhs.substring(index1 + 1));
final int sequence2 = Integer.parseInt(rhs.substring(index2 + 1));
return sequence1 - sequence2;
}
}
}
ZooKeeperLock是fair的,并且在Node中保存的数据是线程ID,进程ID以及主机名。需要注意的是,应该为ZooKeeper部署集群,此外还需要保证传入ZooKeeperLock构造函数中的ZooKepper实例已经跟Server建立的连接,否则zookeeper.getSessionId()会返回0,从而导致错误。
3 disclaimer
笔者只对以上代码进行了简单的测试,因此可能存在错误,请慎重使用。如果发现问题,感谢反馈。