Lock-Free,无锁数据结构 & 无锁编程 Orocos — Muliti Writer Single Reader Queue

15 篇文章 1 订阅
12 篇文章 5 订阅

无锁数据结构

无锁的数据结构应用越来越广泛,现在几乎所有的多核cpu都提供了 CAS 操作。

实现无锁的 Muliti Writer Single Reader Queue 的关键还是 实现指针的 CAS( gcc: __sync_val_compare_and_swap ; vc++: InterlockedIncrement 一个 mutable volatile long var;).

这里有一个 Single Writer Mulity Reader 的 无锁实现 see link

实现过程:

创建一个数组 _buf = new C[_size]表示一个队列, 成员变量 _indexs 保存这个队列当前的索引值(其中_indexs._value_indexs[0]_indexs[1]组成, _indexs[0]保存已经入队的值的索引, _indexs[1]保存下一个空的位置的索引), 在 enqueue(入队) 和 dequeue(出队) 过程中用 局部变量 oldvalnewval 用于保存当前索引值和设置新的索引值,然后原子的将新的索引值写入当前索引 (os::CAS(&_indxes._value, oldval._value, newval._value)), 源码如下:

template<class T>
class AtomicMWSRQueue
{
    //typedef _T* T;
    const int _size;
    typedef T C;
    typedef volatile C* CachePtrType;
    typedef C* volatile CacheObjType;
    typedef C ValueType;
    typedef C* PtrType;

    /**
     * Both read and write pointer are in a union.
     * This means a read's cas may fail because a
     * write happened (preemption). An implementation
     * with 2 distinct read/write pointers would not
     * suffer from this.
     */
     // union 的使用使得可以通过一次读写 value 的值而更新 index[0]  和 index[1] 两个变量
    union SIndexes
    {
        unsigned long _value;
        unsigned short _index[2];
    };

    /**
     * The pointer to the buffer can be cached,
     * the contents are volatile.
     */
    CachePtrType _buf;

    /**
     * The indexes are packed into one double word.
     * Therefore the read and write index can be read and written atomically.
     */
    volatile SIndexes _indxes;

    /**
     * Atomic advance and wrap of the Write pointer.
     * Return the old position or zero if queue is full.
     */
     // 这个地方可重入,每次都会递增 newval._index[0]
    CachePtrType advance_w()
    {
        SIndexes oldval, newval;  // 在栈上新建两个局部对象索引
        do
        {
            oldval._value = _indxes._value; /*Points to a free writable pointer.*/
            newval._value = oldval._value; /*Points to the next writable pointer.*/
            // check for full :
            if ((newval._index[0] == newval._index[1] - 1) || (newval._index[0] == newval._index[1] + _size - 1))
            {
                return 0;
            }
            newval._index[0]++;
            if (newval._index[0] >= _size)
                newval._index[0] = 0;
            // if ptr is unchanged, replace it with newval.
        } while (!os::CAS(&_indxes._value, oldval._value, newval._value));
        // frome here on :
        // oldval is 'unique', other preempting threads
        // will have a different value for oldval, as
        // wptr advances. As long as oldval has not been written,
        // rptr will not advance and wptr will remain stuck behind it.
        // return the old position to write to :
        return &_buf[oldval._index[0]];  // oldval._index[0] 从 0 开始 到 最大值 _size
    }

    /**
     * Advance and wrap of the Read pointer.
     * Only one thread may call this.
     */
    bool advance_r(T& result)
    {
        SIndexes oldval, newval;
        // read it:
        oldval._value = _indxes._value;
        result = _buf[oldval._index[1]];
        // return it if not yet written:
        if ( !result )
            return false;
        // got it, clear field.
        _buf[oldval._index[1]] = 0;

        // move pointer:
        do
        {
            // re-read indxes, since we are the only reader,
            // _index[1] will not have changed since entry of this function
            oldval._value = _indxes._value;  // 如果 CAS 失败,则此处会恢复 index[0] 和 index[1] 使得之前 ++newval._index[1] 失效,需要再次递增
            newval._value = oldval._value;
            ++newval._index[1];  // 
            if (newval._index[1] >= _size)
                newval._index[1] = 0;

            // we need to CAS since the write pointer may have moved.
            // this moves read pointer only:
        } while (!os::CAS(&_indxes._value, oldval._value, newval._value));  // 如果在读的过程中,其他线程进行了写操作,则再次更新两个指针 index[0] 和 index[1] 的值

        return true;
    }

    // non-copyable !
    AtomicMWSRQueue(const AtomicMWSRQueue<T>&);
public:
    typedef unsigned int size_type;

    /**
     * Create an AtomicMWSRQueue with queue size \a size.
     * @param size The size of the queue, should be 1 or greater.
     */
     // 构造后必须初始化
    AtomicMWSRQueue(unsigned int size) :
        _size(size + 1)
    {
        _buf = new C[_size];
        this->clear();
    }

    ~AtomicMWSRQueue()
    {
        delete[] _buf;
    }

    /**
     * Inspect if the Queue is full.
     * @return true if full, false otherwise.
     */
    bool isFull() const
    {
        // two cases where the queue is full :
        // if wptr is one behind rptr or if wptr is at end
        // and rptr at beginning.
        SIndexes val;
        val._value = _indxes._value;
        return val._index[0] == val._index[1] - 1 || val._index[0] == val._index[1] + _size - 1;
    }

    /**
     * Inspect if the Queue is empty.
     * @return true if empty, false otherwise.
     */
    bool isEmpty() const
    {
        // empty if nothing to read.
        SIndexes val;
        val._value = _indxes._value;
        return val._index[0] == val._index[1];
    }

    /**
     * Return the maximum number of items this queue can contain.
     */
    size_type capacity() const
    {
        return _size - 1;
    }

    /**
     * Return the number of elements in the queue.
     */
    size_type size() const
    {
        SIndexes val;
        val._value = _indxes._value;
        int c = (val._index[0] - val._index[1]);
        return c >= 0 ? c : c + _size;
    }

    /**
     * Enqueue an item.
     * @param value The value to enqueue.
     * @return false if queue is full, true if queued.
     */
    bool enqueue(const T& value)  // 入队,从这里开始
    {
        if (value == 0)
            return false;
        CachePtrType loc = advance_w();  // 关键,原子的递增索引值,获得该线程独占的索引
        if (loc == 0)
            return false;
        *loc = value;  // 将值写入索引所指的位置
        return true;
    }

    /**
     * Dequeue an item.
     * @param value Stores the dequeued value. It is unchanged when
     * dequeue returns false and contains the dequeued value
     * when it returns true.
     * @return false if queue is empty, true if result was written.
     */
    bool dequeue(T& result)  // 出队,从这里开始
    {
        T tmpresult;
        if (advance_r(tmpresult) ) {
            result = tmpresult;
            return true;
        }
        return false;
    }

    /**
     * Return the next to be read value.
     */
    const T front() const
    {
        return _buf[_indxes._index[1]];
    }

    /**
     * Clear all contents of the Queue and thus make it empty.
     */
    void clear()
    {
        for (int i = 0; i != _size; ++i)
        {
            _buf[i] = 0;
        }
        _indxes._value = 0;
    }

};

注意:CAS 存在 ABA 的问题,不过上述代码中 CAS 的值是指针,如果只是应用无锁编程到一个queue中,则 ABA问题也不会带来严重的问题。但是处理ABA问题都比较棘手,现在也没有特别通用的解决方法,一般是通过特殊的硬件指令来处理。

reference link:
https://gcc.gnu.org/onlinedocs/gcc-4.4.1/gcc/Atomic-Builtins.html
http://www.ibm.com/developerworks/aix/library/au-multithreaded_structures2/
http://preshing.com/20120612/an-introduction-to-lock-free-programming/

About Lock-Free Programming

通常情况下,无锁的数据结构基本上只考虑数据的原子性(atomic,最基本的要求)。

但是无锁编程涉及的面更广(当然,只有在多线程程序中才需要讨论无锁的问题),因为其不仅仅涉及到数据的原子性问题,还涉及到reordering的问题(编译器优化,cache, CPU 优化等等);以及atomic数据和non-atomic数据的交互,以及由此带来的程序逻辑问题,处理不恰当会导致程序出现意想不到运行结果。

Lock-free programming 涉及到系统的内存模型。在c++11之前,c++规范没有清晰地定义内存模型,而内存模型是多线程程序能够正确运行的基础(因此,在c++11之前,多线程程序在没有使用mutex同步的情况下,很多场景会导致Data Race,而其运行结果是未定义的)。有了内存模型,不管使用何种cpu何种编译器编译的多线程程序,只要考虑了 Sequential Consistency 的问题,就能够在各种平台准确无误的运行。

Sequential Consistency 问题关系到 memory reordering (包括编译器乱序 和 CPU 乱序), 而 memory reordering 就需要考虑 The Happens-Before Relation & The Synchronizes-With Relation.

总之,c++11有如下这些memory_order选项:

Memory order can be specified using the following enumeration:

namespace std {
 typedef enum memory_order {
   memory_order_relaxed,
   memory_order_consume,
   memory_order_acquire,
   memory_order_release,
   memory_order_acq_rel,
   memory_order_seq_cst
 } memory_order; 
}

The default for all operations on atomic variables is memory_order_seq_cst which, just like Java’s volatile, enforces sequential consistency. Other orderings are used to relax sequential consistency and often squeeze better performance from lock-free algorithms.

memory_order_acquire: guarantees that subsequent loads are not moved before the current load or any preceding loads.
memory_order_release: preceding stores are not moved past the current store or any subsequent stores.
memory_order_acq_rel: combines the two previous guarantees.
memory_order_consume: potentially weaker form of memory_order_acquire that enforces ordering of the current load before other operations that are data-dependent on it (for instance, when a load of a pointer is marked memory_order_consume, subsequent operations that dereference this pointer won’t be moved before it (yes, even that is not guaranteed on all platforms!).
memory_order_relaxed: all reorderings are okay.

转载自:https://bartoszmilewski.com/2008/12/01/c-atomics-and-memory-ordering/

to be continue…

参考链接:

  • http://preshing.com/20120612/an-introduction-to-lock-free-programming/
  • http://preshing.com/20130618/atomic-vs-non-atomic-operations/
  • http://preshing.com/20120515/memory-reordering-caught-in-the-act/
  • http://preshing.com/20130823/the-synchronizes-with-relation/
  • http://preshing.com/20130702/the-happens-before-relation/
  • 2
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
要在 CPP 中使用 Orocos KDL 和 KDL Parser,需要先安装 Orocos KDL 库和 KDL Parser 库。下面是安装步骤: 1. 安装 Orocos KDL 库: ``` sudo apt-get install liborocos-kdl-dev ``` 2. 安装 KDL Parser 库: ``` sudo apt-get install libkdl-parser-dev ``` 安装完成后,可以在 CPP 中使用这两个库。使用 Orocos KDL 和 KDL Parser 的示例代码如下: ```cpp #include <kdl/chain.hpp> #include <kdl/chainfksolver.hpp> #include <kdl/chainfksolverpos_recursive.hpp> #include <kdl/chainjnttojacsolver.hpp> #include <kdl_parser/kdl_parser.hpp> int main(int argc, char** argv) { // Load the robot description from the parameter server. std::string robot_description; ros::param::get("robot_description", robot_description); // Parse the robot description into a KDL tree. KDL::Tree robot_kdl; if (!kdl_parser::treeFromString(robot_description, robot_kdl)) { ROS_ERROR("Failed to construct KDL tree from robot description."); return 1; } // Create a solver for computing the forward kinematics of the robot. KDL::Chain robot_chain; robot_kdl.getChain("base_link", "end_effector_link", robot_chain); KDL::ChainFkSolverPos_recursive fk_solver(robot_chain); // Compute the forward kinematics of the robot for a given joint configuration. KDL::JntArray joint_positions(robot_chain.getNrOfJoints()); for (size_t i = 0; i < robot_chain.getNrOfJoints(); ++i) { joint_positions(i) = i * 0.1; } KDL::Frame end_effector_pose; fk_solver.JntToCart(joint_positions, end_effector_pose); // Create a solver for computing the Jacobian of the robot. KDL::ChainJntToJacSolver jac_solver(robot_chain); // Compute the Jacobian of the robot for a given joint configuration. KDL::Jacobian jacobian; jac_solver.JntToJac(joint_positions, jacobian); return 0; } ``` 这段代码演示了如何使用 Orocos KDL 和 KDL Parser 实现机器人的正运动学和雅克比矩阵计算。需要注意的是,这段代码是在 ROS 中编写的,如果在其他环境中使用,需要根据需要进行修改。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值