1 Overview
在分布式系统中,通常会避免使用分布式锁。然而在某些场景下,还是存在对分布式锁的需求。跟普通锁相比,分布式锁面需要对的问题更多,例如怎样保证某个进程在持有锁时意外终止之后,其它进程也能够正常地获得锁等等。笔者认为一个比较好的分布式锁实现是Terracotta,但是这不是本文的重点,感兴趣的读者可以参考笔者的Terracotta in Action 系列文章(http://whitesock.iteye.com/blog/351780 ,http://whitesock.iteye.com/blog/352876 , http://whitesock.iteye.com/blog/354587 )。
除了Terracotta,不少其它开源项目也声称支持分布式锁,例如ZooKeeper,JGroups和Hazelcast等。在这些项目中,笔者倾向于使用ZooKeeper。ZooKeeper在其官方文档的ZooKeeper Recipes and Solutions章节中介绍了一个分布式锁的实现,本文主要对该版本进行了改良。关于Hazelcast,笔者不得不说,其官方文档文字不少但却苍白,很多内容介绍的都是浅尝辄止,难道是强迫开发人员去仔细地阅读源码,或者参加其价格不菲的培训?
2 Implementation
首先,笔者希望分布式锁能够支持Java并发包中的Lock接口,并且最好是可重入的。此外,在某个进程持有分布式锁的过程中,如果不能保证该锁不会被其它进程同时持有(例如网络故障),那么至少应该能够通知锁的持有者,以便其采取相应的应对措施。以下是笔者对分布式锁的定义:
- import java.util.concurrent.locks.Lock;
- public interface DistributedLock extends Lock {
- Listener getListener();
- void setListener(Listener listener);
- /**
- *
- */
- interface Listener {
- void onAbort(DistributedLock lock, Exception e);
- }
- }
其中Listener接口的作用是,在无法排它独占该锁时进行回调。接下来是笔者的两个实现的共通父类。
- import java.util.concurrent.TimeUnit;
- import java.util.concurrent.locks.Condition;
- import java.util.concurrent.locks.ReentrantLock;
- public abstract class AbstractDistributedLock implements DistributedLock {
- //
- protected volatile boolean verbose;
- protected volatile Listener listener;
- protected final ReentrantLock lock = new ReentrantLock();
- //
- protected abstract void doLock();
- protected abstract void doUnlock();
- protected abstract boolean doTryLock();
- protected abstract void doLockInterruptibly() throws InterruptedException;
- protected abstract boolean doTryLock(long timeout, TimeUnit unit) throws InterruptedException;
- /**
- *
- */
- public boolean isVerbose() {
- return verbose;
- }
- public void setVerbose(boolean verbose) {
- this.verbose = verbose;
- }
- public boolean isLocked() {
- return this.lock.isLocked();
- }
- public boolean isHeldByCurrentThread() {
- return this.lock.isHeldByCurrentThread();
- }
- /**
- *
- */
- @Override
- public Listener getListener() {
- return this.listener;
- }
- @Override
- public void setListener(Listener listener) {
- this.listener = listener;
- }
- /**
- *
- */
- @Override
- public void lock() {
- //
- this.lock.lock();
- if(this.lock.getHoldCount() > 1) return;
- //
- boolean succeed = false;
- try {
- doLock();
- succeed = true;
- } finally {
- if(!succeed) {
- this.lock.unlock();
- }
- }
- }
- @Override
- public void lockInterruptibly() throws InterruptedException {
- //
- this.lock.lockInterruptibly();
- if(this.lock.getHoldCount() > 1) return;
- //
- boolean succeed = false;
- try {
- doLockInterruptibly();
- succeed = true;
- } finally {
- if(!succeed) {
- this.lock.unlock();
- }
- }
- }
- @Override
- public boolean tryLock() {
- //
- if(!this.lock.tryLock()) return false;
- if(this.lock.getHoldCount() > 1) return true;
- //
- boolean succeed = false;
- try {
- succeed = doTryLock();
- } finally {
- if(!succeed) {
- this.lock.unlock();
- }
- }
- return succeed;
- }
- @Override
- public boolean tryLock(long timeout, TimeUnit unit) throws InterruptedException {
- //
- final long mark = System.nanoTime();
- if(!this.lock.tryLock(timeout, unit)) return false;
- if(this.lock.getHoldCount() > 1) return true;
- //
- boolean succeed = false;
- try {
- timeout = TimeUnit.NANOSECONDS.convert(timeout, unit) - (System.nanoTime() - mark);
- if(timeout >= 0) {
- succeed = doTryLock(timeout, TimeUnit.NANOSECONDS);
- }
- } finally {
- if(!succeed) {
- this.lock.unlock();
- }
- }
- return succeed;
- }
- @Override
- public void unlock() {
- //
- if(!this.lock.isHeldByCurrentThread()) return;
- if(this.lock.getHoldCount() > 1) return;
- //
- try {
- doUnlock();
- } finally {
- this.lock.unlock();
- }
- }
- @Override
- public Condition newCondition() {
- throw new UnsupportedOperationException();
- }
- }
2.1 MySQL Named Lock
在讨论ZooKeeper的分布式锁实现之前,先介绍一下笔者基于MySQL Named Lock的一个实现。
- import java.sql.Connection;
- import java.sql.PreparedStatement;
- import java.sql.ResultSet;
- import java.util.concurrent.ScheduledExecutorService;
- import java.util.concurrent.ScheduledFuture;
- import java.util.concurrent.TimeUnit;
- import java.util.concurrent.atomic.AtomicReference;
- import javax.sql.DataSource;
- import org.apache.commons.lang.builder.ToStringBuilder;
- import org.apache.commons.lang.builder.ToStringStyle;
- import org.apache.commons.lang.exception.NestableRuntimeException;
- import org.slf4j.Logger;
- import org.slf4j.LoggerFactory;
- public final class MySQLNamedLock extends AbstractDistributedLock {
- //
- private static final Logger LOGGER = LoggerFactory.getLogger(MySQLNamedLock.class);
- //
- private String name;
- private DataSource dataSource;
- private long validationInterval = 1000L;
- private ScheduledExecutorService scheduler;
- private final AtomicReference<Connection> connection;
- private final AtomicReference<ScheduledFuture<?>> future;
- /**
- *
- */
- public MySQLNamedLock() {
- this(null, null, null);
- }
- public MySQLNamedLock(String name, DataSource dataSource, ScheduledExecutorService scheduler) {
- this.name = name;
- this.scheduler = scheduler;
- this.dataSource = dataSource;
- this.connection = new AtomicReference<Connection>();
- this.future = new AtomicReference<ScheduledFuture<?>>();
- }
- /**
- *
- */
- @Override
- public String toString() {
- return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
- .append("name", this.name).toString();
- }
- /**
- *
- */
- public String getName() {
- return name;
- }
- public void setName(String name) {
- this.name = name;
- }
- public long getValidationInterval() {
- return validationInterval;
- }
- public void setValidationInterval(long interval) {
- this.validationInterval = interval;
- }
- public DataSource getDataSource() {
- return dataSource;
- }
- public void setDataSource(DataSource dataSource) {
- this.dataSource = dataSource;
- }
- public ScheduledExecutorService getScheduler() {
- return scheduler;
- }
- public void setScheduler(ScheduledExecutorService scheduler) {
- this.scheduler = scheduler;
- }
- /**
- *
- */
- @Override
- protected void doLock() {
- doTryLock(Integer.MAX_VALUE, TimeUnit.SECONDS);
- }
- @Override
- protected void doLockInterruptibly() {
- doTryLock(Integer.MAX_VALUE, TimeUnit.SECONDS);
- }
- @Override
- protected boolean doTryLock() {
- return doTryLock(0, TimeUnit.SECONDS);
- }
- @Override
- protected boolean doTryLock(long timeout, TimeUnit unit) {
- //
- Integer r = null;
- ResultSet rs = null;
- PreparedStatement ps = null;
- try {
- this.connection.set(this.dataSource.getConnection());
- ps = this.connection.get().prepareStatement("SELECT GET_LOCK(?, ?)");
- ps.setString(1, this.name);
- ps.setInt(2, (int)TimeUnit.SECONDS.convert(timeout, unit));
- rs = ps.executeQuery();
- if(rs.next()) {
- r = rs.getInt(1);
- if(rs.wasNull()) r = null;
- }
- } catch(Exception e) {
- throw new NestableRuntimeException("failed to lock, name: " + this.name, e);
- } finally {
- JdbcUtils.closeQuietly(rs);
- JdbcUtils.closeQuietly(ps);
- }
- //
- final boolean succeed = (r != null && r == 1);
- if(succeed && this.listener != null) {
- final long interval = this.validationInterval;
- this.future.set(this.scheduler.scheduleWithFixedDelay(new ValidationTask(), interval, interval, TimeUnit.MILLISECONDS));
- }
- //
- return succeed;
- }
- @Override
- protected void doUnlock() {
- //
- final ScheduledFuture<?> f = this.future.getAndSet(null);
- if(f != null) f.cancel(true);
- //
- Integer r = null;
- ResultSet rs = null;
- PreparedStatement ps = null;
- try {
- //
- ps = this.connection.get().prepareStatement("SELECT RELEASE_LOCK(?)");
- ps.setString(1, this.name);
- rs = ps.executeQuery();
- if(rs.next()) {
- r = rs.getInt(1);
- if(rs.wasNull()) r = null;
- }
- //
- if(r == null) {
- LOGGER.warn("lock does NOT exist, name: {}", this.name);
- } else if(r == 0) {
- LOGGER.warn("lock was NOT accquired by current thread, name: {}", this.name);
- } else {
- LOGGER.warn("failed to unlock, name: {}, result: {}", this.name, r);
- }
- } catch(Exception e) {
- throw new NestableRuntimeException("failed to unlock, name: " + this.name, e);
- } finally {
- JdbcUtils.closeQuietly(rs);
- JdbcUtils.closeQuietly(ps);
- JdbcUtils.closeQuietly(this.connection.getAndSet(null));
- }
- }
- /**
- *
- */
- private class ValidationTask implements Runnable {
- @Override
- public void run() {
- try {
- ((com.mysql.jdbc.Connection)connection.get()).ping();
- } catch(Exception e) {
- //
- if(isLocked() && listener != null && connection.get() != null) {
- listener.onAbort(MySQLNamedLock.this, e);
- }
- //
- throw new NestableRuntimeException(e); // Note: suppress subsequent executions
- }
- }
- }
- }
需要注意的是,如果在该锁上注册了Listener,并且Connection在持有锁的过程中失效,那么该Listener会被回调。
2.2 ZooKeeper Lock
以下代码是笔者对ZooKeeper官方版本的改良:
- import java.lang.management.ManagementFactory;
- import java.util.Collections;
- import java.util.Comparator;
- import java.util.List;
- import java.util.concurrent.CountDownLatch;
- import java.util.concurrent.TimeUnit;
- import java.util.concurrent.atomic.AtomicReference;
- import org.apache.commons.lang.builder.ToStringBuilder;
- import org.apache.commons.lang.builder.ToStringStyle;
- import org.apache.commons.lang.exception.NestableRuntimeException;
- import org.apache.zookeeper.CreateMode;
- import org.apache.zookeeper.KeeperException;
- import org.apache.zookeeper.WatchedEvent;
- import org.apache.zookeeper.Watcher;
- import org.apache.zookeeper.ZooDefs;
- import org.apache.zookeeper.ZooKeeper;
- import org.apache.zookeeper.data.Stat;
- import org.slf4j.Logger;
- import org.slf4j.LoggerFactory;
- public final class ZooKeeperLock extends AbstractDistributedLock {
- //
- private static final Logger LOGGER = LoggerFactory.getLogger(ZooKeeperLock.class);
- //
- private String directory;
- private ZooKeeper zookeeper;
- private final String processName;
- private final AtomicReference<ZooKeeperLocker> locker;
- /**
- *
- */
- public ZooKeeperLock() {
- this(null, null);
- }
- public ZooKeeperLock(ZooKeeper zookeeper, String directory) {
- this.zookeeper = zookeeper;
- this.directory = directory;
- this.locker = new AtomicReference<ZooKeeperLocker>();
- this.processName = ManagementFactory.getRuntimeMXBean().getName();
- }
- /**
- *
- */
- @Override
- public String toString() {
- return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
- .append("directory", this.directory).toString();
- }
- /**
- *
- */
- public String getDirectory() {
- return directory;
- }
- public void setDirectory(String directory) {
- this.directory = directory;
- }
- public ZooKeeper getZookeeper() {
- return zookeeper;
- }
- public void setZookeeper(ZooKeeper zookeeper) {
- this.zookeeper = zookeeper;
- }
- /**
- *
- */
- @Override
- protected void doLock() {
- doTryLock(Integer.MAX_VALUE, TimeUnit.SECONDS);
- }
- @Override
- protected void doLockInterruptibly() {
- doTryLock(Integer.MAX_VALUE, TimeUnit.SECONDS);
- }
- @Override
- protected boolean doTryLock() {
- return doTryLock(0, TimeUnit.SECONDS);
- }
- @Override
- protected boolean doTryLock(long timeout, TimeUnit unit) {
- try {
- this.locker.set(new ZooKeeperLocker());
- return this.locker.get().lock(timeout, unit);
- } catch(Exception e) {
- throw new NestableRuntimeException("failed to lock, directory: " + this.directory, e);
- }
- }
- @Override
- protected void doUnlock() {
- try {
- this.locker.get().unlock();
- } catch(Exception e) {
- throw new NestableRuntimeException("failed to unlock, directory: " + this.directory, e);
- } finally {
- this.locker.set(null);
- }
- }
- /**
- *
- */
- private class ZooKeeperLocker implements Watcher {
- //
- private volatile String name;
- private volatile CountDownLatch latch;
- /**
- *
- */
- @Override
- public void process(WatchedEvent event) {
- //
- if(this.latch != null) {
- this.latch.countDown();
- }
- //
- if(isVerbose() && LOGGER.isInfoEnabled()) {
- LOGGER.info("received an event: {}", event);
- }
- }
- public boolean lock(long timeout, TimeUnit unit) throws Exception {
- boolean succeed = false;
- try {
- do {
- final long mark = System.nanoTime();
- timeout = TimeUnit.NANOSECONDS.convert(timeout, unit);
- try {
- succeed = doLock(timeout, TimeUnit.NANOSECONDS);
- break;
- } catch (KeeperException.ConnectionLossException e) {
- timeout -= (System.nanoTime() - mark);
- if(isVerbose() && LOGGER.isInfoEnabled()) {
- LOGGER.info("connection was lost, directory: {}, name: {}, message: {}", new Object[]{directory, this.name, e.getMessage()});
- }
- }
- }
- while(timeout > 0);
- } finally {
- if(!succeed) { // Unlock quietly
- try {
- unlock();
- } catch(Exception e) {
- LOGGER.warn("failed to unlock, directory: " + directory + ", name: " + this.name, e);
- }
- }
- }
- return succeed;
- }
- public void unlock() throws Exception {
- try {
- zookeeper.delete(directory + "/" + this.name, -1);
- } catch (KeeperException.NoNodeException e) {
- LOGGER.warn("node does NOT exist, directory: {}, name: {}, message: {}", new Object[]{directory, this.name, e.getMessage()});
- } finally {
- this.name = null;
- }
- }
- /**
- *
- */
- private Boolean doLock(long timeout, TimeUnit unit) throws Exception {
- boolean succeed = false;
- do {
- //
- final long mark = System.nanoTime();
- timeout = TimeUnit.NANOSECONDS.convert(timeout, unit);
- //
- if (this.name == null) {
- this.name = findOrCreateChild();
- }
- //
- final List<String> children = zookeeper.getChildren(directory, false);
- if (children.isEmpty()) {
- this.name = null;
- LOGGER.warn("could not find any child, directory: {}, name: {}", new Object[]{directory, this.name});
- } else {
- final SequenceComparator comparator = new SequenceComparator();
- Collections.sort(children, comparator);
- final int index = Collections.binarySearch(children, this.name, comparator);
- if (index > 0) { // Not the first one
- this.latch = new CountDownLatch(1);
- final String previous = children.get(index - 1);
- final Stat stat = zookeeper.exists(directory + "/" + previous, this);
- if (stat != null) {
- this.latch.await(timeout, TimeUnit.NANOSECONDS);
- this.latch = null;
- } else {
- LOGGER.warn("could not find the previous child, directory: {}, name: {}", new Object[]{directory, this.name});
- }
- } else {
- final String owner = children.get(0);
- if (this.name != null && owner != null && this.name.equals(owner)) {
- succeed = true;
- } else {
- LOGGER.warn("the lock should be held by current thread, directory: {}, name: {}, owner: {}", new Object[]{directory, this.name, owner});
- }
- }
- }
- //
- timeout -= (System.nanoTime() - mark);
- } while (!succeed && timeout >= 0);
- return succeed;
- }
- private String findOrCreateChild() throws Exception {
- //
- final String prefix = zookeeper.getSessionId() + "-";
- final List<String> children = zookeeper.getChildren(directory, false);
- for (String child : children) {
- if (child.startsWith(prefix)) {
- if(isVerbose() && LOGGER.isInfoEnabled()) {
- LOGGER.info("found a child, directory: {}, child: {}", new Object[]{directory, child});
- }
- return child;
- }
- }
- //
- final String data = Thread.currentThread().getId() + "@" + processName;
- final String path = zookeeper.create(directory + "/" + prefix, data.getBytes(), ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL_SEQUENTIAL);
- final String child = path.substring(path.lastIndexOf("/") + 1);
- if(isVerbose() && LOGGER.isInfoEnabled()) {
- LOGGER.info("created a child, directory: {}, path: {}", new Object[]{directory, child});
- }
- return child;
- }
- }
- /**
- *
- */
- private static class SequenceComparator implements Comparator<String> {
- @Override
- public int compare(String lhs, String rhs) {
- final int index1 = lhs.lastIndexOf('-');
- final int index2 = rhs.lastIndexOf('-');
- final int sequence1 = Integer.parseInt(lhs.substring(index1 + 1));
- final int sequence2 = Integer.parseInt(rhs.substring(index2 + 1));
- return sequence1 - sequence2;
- }
- }
- }
ZooKeeperLock是fair的,并且在Node中保存的数据是线程ID,进程ID以及主机名。需要注意的是,应该为ZooKeeper部署集群,此外还需要保证传入ZooKeeperLock构造函数中的ZooKepper实例已经跟Server建立的连接,否则zookeeper.getSessionId()会返回0,从而导致错误。
3 disclaimer
笔者只对以上代码进行了简单的测试,因此可能存在错误,请慎重使用。如果发现问题,感谢反馈。