无锁编程之免锁

要保证线程安全，并不是一定就要进行同步，两者没有因果关系，即使涉及到共享数据，也未必需要加锁，大家往下看。

环形缓冲区是生产者和消费者模型中常用的数据结构。生产者将数据放入数组的尾端，而消费者从数组的另一端移走数据，当达到数组的尾部时，生产者绕回到数组的头部。

如图所示，当读者和写者指针相等时，表明缓冲区是空的，而只要写入指针在读取指针后面时，表明缓冲区已满。

如果只有一个生产者和一个消费者，那么就可以做到免锁访问环形缓冲区（Ring Buffer）。写入索引只允许生产者访问并修改，只要写入者在更新索引之前将新的值保存到缓冲区中，则读者将始终看到一致的数据结构。同理，读取索引也只允许消费者访问并修改。

先贴代码，以下代码来自linux-3.7.1

kfifo_put-》__kfifo_in_r-》kfifo_copy_in

static void kfifo_copy_in(struct __kfifo *fifo, const void *src,
       unsigned int len, unsigned int off)
{
   unsigned int size = fifo->mask + 1;
   unsigned int esize = fifo->esize;
   unsigned int l;

   off &= fifo->mask;
   if (esize != 1) {
       off *= esize;
       size *= esize;
       len *= esize;
   }
   l = min(len, size - off);

   memcpy(fifo->data + off, src, l);
   memcpy(fifo->data, src + l, len - l);
   /*
   * make sure that the data in the fifo is up to date before
   * incrementing the fifo->in index counter
   */
   smp_wmb();
}

/**

* kfifo_put - put data into the fifo

* @fifo: address of the fifo to be used

* @val: the data to be added

*

* This macro copies the given value into the fifo.

* It returns 0 if the fifo was full. Otherwise it returns the number

* processed elements.

*

* Note that with only one concurrent reader and one concurrent

* writer, you don't need extra locking to use these macro.

*/

#define kfifo_put(fifo, val) \

({ \

typeof((fifo) + 1) __tmp = (fifo); \

typeof((val) + 1) __val = (val); \

unsigned int __ret; \

const size_t __recsize = sizeof(*__tmp->rectype); \

struct __kfifo *__kfifo = &__tmp->kfifo; \

if (0) { \

typeof(__tmp->ptr_const) __dummy __attribute__ ((unused)); \

__dummy = (typeof(__val))NULL; \

} \

if (__recsize) \

__ret = __kfifo_in_r(__kfifo, __val, sizeof(*__val), \

__recsize); \

else { \

__ret = !kfifo_is_full(__tmp); \

if (__ret) { \

(__is_kfifo_ptr(__tmp) ? \

((typeof(__tmp->type))__kfifo->data) : \

(__tmp->buf) \

)[__kfifo->in & __tmp->kfifo.mask] = \

*(typeof(__tmp->type))__val; \

smp_wmb(); \

__kfifo->in++; \

} \

__ret; \

})

/**

* kfifo_get - get data from the fifo

* @fifo: address of the fifo to be used

* @val: the var where to store the data to be added

*

* This macro reads the data from the fifo.

* It returns 0 if the fifo was empty. Otherwise it returns the number

* processed elements.

*

* Note that with only one concurrent reader and one concurrent

* writer, you don't need extra locking to use these macro.

*/

#define kfifo_get(fifo, val) \

__kfifo_uint_must_check_helper( \

({ \

typeof((fifo) + 1) __tmp = (fifo); \

typeof((val) + 1) __val = (val); \

unsigned int __ret; \

const size_t __recsize = sizeof(*__tmp->rectype); \

struct __kfifo *__kfifo = &__tmp->kfifo; \

if (0) \

__val = (typeof(__tmp->ptr))0; \

if (__recsize) \

__ret = __kfifo_out_r(__kfifo, __val, sizeof(*__val), \

__recsize); \

else { \

__ret = !kfifo_is_empty(__tmp); \

if (__ret) { \

*(typeof(__tmp->type))__val = \

(__is_kfifo_ptr(__tmp) ? \

((typeof(__tmp->type))__kfifo->data) : \

(__tmp->buf) \

)[__kfifo->out & __tmp->kfifo.mask]; \

smp_wmb(); \

__kfifo->out++; \

} \

__ret; \

}) \

)

是不是有点雾水了，没关系，我们今天的重点不是讲述环形缓存区，请看以下伪代码

// 生产者

put（）

｛

写缓冲区数据；

smp_wmb(); // 写操作内存屏障；

更新写索引

｝

// 消费者

get（）

｛

读缓冲区数据；

smp_wmb(); // 写操作内存屏障；

更新读索引

}

仔细想想，是不是明白为什么不需要加锁了？此例摘自《透过 Linux 内核看无锁编程》网址：http://www.ibm.com/developerworks/cn/linux/l-cn-lockfree/#icomments，当时感觉有点不对劲，下面也有人回复是不是少了内存屏障的考虑，然后我翻出linux-3.7.1的代码，果然有写操作屏障。

smp_wmb就是一个写操作内存屏障，在它之前的写操作当必须在它之后的写操作前完成，如果没有这个写操作屏障，由于cpu的乱序机制，可能把更新索引操作提前，那么就会造成缓冲区的混乱了。

实际上在JVM的实现中，内存屏障（barrier）也到处存在，至于会不会被java语言直接使用，就需要我们进一步去了解了，其实熟悉后发现，JAVA距离硬件并不十分遥远，如果有这个需求，JVM完全可以提供这样的机制。

最后附上smp_wmb的在linux中的相关实现

#define barrier() __asm__ __volatile__("": : :"memory")

#ifdef CONFIG_X86_32 // 32位cpu

#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) // 具有XMM2的CPU使用mfence指令，否则使用lock;

#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) // 同上

#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) // 同上

#else

#define mb() asm volatile("mfence":::"memory")

#define rmb() asm volatile("lfence":::"memory")

#define wmb() asm volatile("sfence" ::: "memory")

#endif

#ifdef CONFIG_SMP // 对称多处理器，即多核

#define smp_mb() mb()

#ifdef CONFIG_X86_PPRO_FENCE

# define smp_rmb() rmb()

#else

# define smp_rmb() barrier()

#endif

#ifdef CONFIG_X86_OOSTORE // OOO:Out of Order,乱序执行 X86基本上都不用考虑写操作乱序，不过据说有些IA32 CPU厂商实现了OOO Store

# define smp_wmb() wmb()

#else

# define smp_wmb() barrier()

#endif

#define smp_read_barrier_depends() read_barrier_depends()

#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)

#else // 单核不存在乱序问题，所以只要通过barrier对编译器进行约束就可以了

#define smp_mb() barrier()

#define smp_rmb() barrier()

#define smp_wmb() barrier()

#define smp_read_barrier_depends() do { } while (0)

#define set_mb(var, value) do { var = value; barrier(); } while (0)

#endif