zk实现分布式锁，InterProcessMutex源码解析

@snow peak

已于 2022-02-15 22:37:15 修改

阅读量340

点赞数 1

分类专栏： zookeeper 文章标签： zookeeper java 分布式

于 2022-02-15 22:28:16 首次发布

本文链接：https://blog.csdn.net/liu544059282/article/details/122950941

版权

zookeeper 专栏收录该内容

1 篇文章 0 订阅

订阅专栏

工作中遇到一个使用分布式锁的场景，顺便研究了下InterProcessMutex锁的实现原理。

踩坑经历：

使用acquire 方法，未进行正确relase释放锁
高并发情况下，获取不到锁的线程会进行自旋操作，并在未超时的时间内持续执行业务代码。

API说明
InterProcessMutex有两个构造方法

public InterProcessMutex(CuratorFramework client, String path)
    {
        this(client, path, new StandardLockInternalsDriver());
    }

    public InterProcessMutex(CuratorFramework client, String path, LockInternalsDriver driver)
    {
        this(client, path, LOCK_NAME, 1, driver);
    }

参数说明如下

参数	说明
client	curator中zk客户端对象
path	抢锁路径，同一个锁path需一致
driver	可自定义lock驱动实现分布式锁

主要方法如下

//获取锁，若失败则阻塞等待直到成功，支持重入
public void acquire() throws Exception
//超时获取锁，超时失败
public boolean acquire(long time, TimeUnit unit) throws Exception
//释放锁
public void release() throws Exception

注意点，调用acquire()方法后需相应调用release()来释放锁

源码分析
从获取锁acquire()方法入手

public void acquire() throws Exception
    {
        if ( !internalLock(-1, null) )
        {
            throw new IOException("Lost connection while trying to acquire lock: " + basePath);
        }
    }

看到调用了internalLock方法，进到internalLock方法中

private boolean internalLock(long time, TimeUnit unit) throws Exception
    {
        /*
           Note on concurrency: a given lockData instance
           can be only acted on by a single thread so locking isn't necessary
        */

        Thread currentThread = Thread.currentThread();
        //先判断当前线程是否持有了锁，如果是，则加锁次数count+1，返回成功
        LockData lockData = threadData.get(currentThread);
        if ( lockData != null )
        {
            // re-entering
            lockData.lockCount.incrementAndGet();
            return true;
        }
        //调用LockInternals的attemptLock()方法进行加锁
        String lockPath = internals.attemptLock(time, unit, getLockNodeBytes());
        //加锁成功，则将当前线程对应加锁数据加到map中
        if ( lockPath != null )
        {
            LockData newLockData = new LockData(currentThread, lockPath);
            threadData.put(currentThread, newLockData);
            return true;
        }

        return false;
    }

进到LockInternals的attemptLock()中，看下代码

String attemptLock(long time, TimeUnit unit, byte[] lockNodeBytes) throws Exception
    {
        //开始时间，后面用做超时判断
        final long      startMillis = System.currentTimeMillis();
        //超时时间，转换为毫秒
        final Long      millisToWait = (unit != null) ? unit.toMillis(time) : null;
        //节点数据
        final byte[]    localLockNodeBytes = (revocable.get() != null) ? new byte[0] : lockNodeBytes;
        //重试次数
        int             retryCount = 0;
        //lockPath
        String          ourPath = null;
        //是否持有锁
        boolean         hasTheLock = false;
        //是否处理完成
        boolean         isDone = false;
        //循环处理
        while ( !isDone )
        {
            isDone = true;

            try
            {
                //在path下创建一个临时有序节点
                ourPath = driver.createsTheLock(client, path, localLockNodeBytes);
                //抢锁并判断是否拥有锁
                hasTheLock = internalLockLoop(startMillis, millisToWait, ourPath);
            }
            catch ( KeeperException.NoNodeException e )
            {
                // 重试范围内时进行重试
                if ( client.getZookeeperClient().getRetryPolicy().allowRetry(retryCount++, System.currentTimeMillis() - startMillis, RetryLoop.getDefaultRetrySleeper()) )
                {
                    isDone = false;
                }
                else
                {
                    throw e;
                }
            }
        }

        if ( hasTheLock )
        {
            return ourPath;
        }

        return null;
    }

创建临时有序节点createsTheLock方法如下，比较简单

public String createsTheLock(CuratorFramework client, String path, byte[] lockNodeBytes) throws Exception
    {
        String ourPath;
        if ( lockNodeBytes != null )
        {
            ourPath = client.create().creatingParentContainersIfNeeded().withProtection().withMode(CreateMode.EPHEMERAL_SEQUENTIAL).forPath(path, lockNodeBytes);
        }
        else
        {
            ourPath = client.create().creatingParentContainersIfNeeded().withProtection().withMode(CreateMode.EPHEMERAL_SEQUENTIAL).forPath(path);
        }
        return ourPath;
    }

判断是否拥有锁的方法internalLockLoop才是核心，下面注意了

private boolean internalLockLoop(long startMillis, Long millisToWait, String ourPath) throws Exception
    {
        boolean     haveTheLock = false;
        boolean     doDelete = false;
        try
        {
            if ( revocable.get() != null )
            {
                client.getData().usingWatcher(revocableWatcher).forPath(ourPath);
            }
            //自旋
            while ( (client.getState() == CuratorFrameworkState.STARTED) && !haveTheLock )
            {
                //获取path下对应临时有序节点，并按节点编号从小到大排序
                List<String>        children = getSortedChildren();
                //获取当前线程创建的临时节点名称
                String              sequenceNodeName = ourPath.substring(basePath.length() + 1); // +1 to include the slash
                //判断当前节点编号是否<maxLease,若是，则抢到了锁，maxLease这里为1，所以只有index为0时才抢到锁，标识只有1个线程能抢到锁
                PredicateResults    predicateResults = driver.getsTheLock(client, children, sequenceNodeName, maxLeases);
                if ( predicateResults.getsTheLock() )
                {
                    haveTheLock = true;
                }
                else
                {
                    //前一个节点编号较小的节点的路径
                    String  previousSequencePath = basePath + "/" + predicateResults.getPathToWatch();

                    synchronized(this)
                    {
                        try 
                        {
                            // use getData() instead of exists() to avoid leaving unneeded watchers which is a type of resource leak
                             //如果没抢到锁，监听前一个节点事件
                            client.getData().usingWatcher(watcher).forPath(previousSequencePath);
                            if ( millisToWait != null )
                            {
                                判断是否超时
                                millisToWait -= (System.currentTimeMillis() - startMillis);
                                startMillis = System.currentTimeMillis();
                                if ( millisToWait <= 0 )
                                {
                                    //超时 直接退出，并标记 删除节点doDelete标记=true
                                    doDelete = true;    // timed out - delete our node
                                    break;
                                }

                                wait(millisToWait);
                            }
                            else
                            {
                                //调用Object.wait()，等待线程被notify唤醒
                                wait();
                            }
                        }
                        catch ( KeeperException.NoNodeException e ) 
                        {
                            // it has been deleted (i.e. lock released). Try to acquire again
                        }
                    }
                }
            }
        }
        catch ( Exception e )
        {
            ThreadUtils.checkInterrupted(e);
            doDelete = true;
            throw e;
        }
        finally
        {
            //如果标记了删除，删除节点数据
            if ( doDelete )
            {
                deleteOurPath(ourPath);
            }
        }
        return haveTheLock;
    }

可以看到逻辑比较清晰，N个线程同时在path下创建临时顺序节点，编号最小的获取锁，没抢到锁的会调用wait()方法等待被唤醒

在监听器wacher里调用了notify()方法来唤醒其他节点，该监听器会在前一个(节点编号较小)的节点被删除后触发

先分析下释放锁的方法release
看下源码

public void release() throws Exception
    {
        /*
            Note on concurrency: a given lockData instance
            can be only acted on by a single thread so locking isn't necessary
         */

        Thread currentThread = Thread.currentThread();
        LockData lockData = threadData.get(currentThread);
        if ( lockData == null )
        {
            throw new IllegalMonitorStateException("You do not own the lock: " + basePath);
        }
        //如果锁被当前线程获取了超过1次，将count-1，直接返回
        int newLockCount = lockData.lockCount.decrementAndGet();
        if ( newLockCount > 0 )
        {
            return;
        }
        if ( newLockCount < 0 )
        {
            throw new IllegalMonitorStateException("Lock count has gone negative for lock: " + basePath);
        }
        try
        {
            //释放锁
            internals.releaseLock(lockData.lockPath);
        }
        finally
        {
            threadData.remove(currentThread);
        }
    }

最终调用releaseLock方法中的deleteOurPath中

void releaseLock(String lockPath) throws Exception
    {
        revocable.set(null);
        deleteOurPath(lockPath);
    }
    
    private void deleteOurPath(String ourPath) throws Exception
    {
        try
        {
        //直接调用client删除节点
            client.delete().guaranteed().forPath(ourPath);
        }
        catch ( KeeperException.NoNodeException e )
        {
            // ignore - already deleted (possibly expired session, etc.)
        }
    }

节点被删除后，会触发抢锁过程中的wather监听器，看下监听器中内容

private final Watcher watcher = new Watcher()
    {
        @Override
        public void process(WatchedEvent event)
        {
            notifyFromWatcher();
        }
    };
private synchronized void notifyFromWatcher()
    {
        notifyAll();
    }

可以看到节点path被删除后，会通知后面一个节点进行notify操作，notify操作后，重新进入while自旋中，重新判断是否抢到了锁