dpdk的ring

struct rte_ring {
	TAILQ_ENTRY(rte_ring) next;      /**< Next in list. */

	char name[RTE_RING_NAMESIZE];    /**< Name of the ring. */
	int flags;                       /**< Flags supplied at creation. */

	/** Ring producer status. */
	struct prod {
		uint32_t watermark;      /**< Maximum items before EDQUOT. */
		uint32_t sp_enqueue;     /**< True, if single producer. */
		uint32_t size;           /**< Size of ring. */
		uint32_t mask;           /**< Mask (size-1) of ring. */
		volatile uint32_t head;  /**< Producer head. */
		volatile uint32_t tail;  /**< Producer tail. */
	} prod __rte_cache_aligned;

	/** Ring consumer status. */
	struct cons {
		uint32_t sc_dequeue;     /**< True, if single consumer. */
		uint32_t size;           /**< Size of the ring. */
		uint32_t mask;           /**< Mask (size-1) of ring. */
		volatile uint32_t head;  /**< Consumer head. */
		volatile uint32_t tail;  /**< Consumer tail. */
#ifdef RTE_RING_SPLIT_PROD_CONS
	} cons __rte_cache_aligned;
#else
	} cons;
#endif

#ifdef RTE_LIBRTE_RING_DEBUG
	struct rte_ring_debug_stats stats[RTE_MAX_LCORE];
#endif

	void * ring[0] __rte_cache_aligned; /**< Memory space of ring starts here.
	 	 	 	 	 	 	 	 	 	 * not volatile so need to be careful
	 	 	 	 	 	 	 	 	 	 * about compiler re-ordering */
};

dpdk实现的multi-enque和multi-deque比起常规的ring性能具体优越在哪里呢?

常规操作:基本上是需要对整个que加锁的,或者对que的head和tail分别加锁


dpdk的操作:

static inline int __attribute__((always_inline))
__rte_ring_mp_do_enqueue(struct rte_ring *r, void * const *obj_table,
			 unsigned n, enum rte_ring_queue_behavior behavior)
{
	uint32_t prod_head, prod_next;
	uint32_t cons_tail, free_entries;
	const unsigned max = n;
	int success;
	unsigned i;
	uint32_t mask = r->prod.mask;
	int ret;

	/* move prod.head atomically */
	do {
		/* Reset n to the initial burst count */
		n = max;

		prod_head = r->prod.head;
		cons_tail = r->cons.tail;
		/* The subtraction is done between two unsigned 32bits value
		 * (the result is always modulo 32 bits even if we have
		 * prod_head > cons_tail). So 'free_entries' is always between 0
		 * and size(ring)-1. */
		free_entries = (mask + cons_tail - prod_head);

		/* check that we have enough room in ring */
		if (unlikely(n > free_entries)) {
			if (behavior == RTE_RING_QUEUE_FIXED) {
				__RING_STAT_ADD(r, enq_fail, n);
				return -ENOBUFS;
			}
			else {
				/* No free entry available */
				if (unlikely(free_entries == 0)) {
					__RING_STAT_ADD(r, enq_fail, n);
					return 0;
				}

				n = free_entries;
			}
		}

		prod_next = prod_head + n;
		success = rte_atomic32_cmpset(&r->prod.head, prod_head,
					      prod_next);
	} while (unlikely(success == 0));

	/* write entries in ring */
	ENQUEUE_PTRS();
	rte_compiler_barrier();

	/* if we exceed the watermark */
	if (unlikely(((mask + 1) - free_entries + n) > r->prod.watermark)) {
		ret = (behavior == RTE_RING_QUEUE_FIXED) ? -EDQUOT :
				(int)(n | RTE_RING_QUOT_EXCEED);
		__RING_STAT_ADD(r, enq_quota, n);
	}
	else {
		ret = (behavior == RTE_RING_QUEUE_FIXED) ? 0 : n;
		__RING_STAT_ADD(r, enq_success, n);
	}

	/*
	 * If there are other enqueues in progress that preceeded us,
	 * we need to wait for them to complete
	 */
	while (unlikely(r->prod.tail != prod_head))
		rte_pause();

	r->prod.tail = prod_next;
	return ret;
}
拥有prod.head和prod.tail,cons.head和cons.tail,在enque的时候,比较prod.head和cons.tail,来确认是否ring已满。

在多个线程同时enque时,通过compare and set(CAS)来确认同步性。在第一个while循环结束后,两个线程的prod_head和prod_next都是不同的,接下来就可以真正的塞入ring的数组中。

而第二个循环:
while (unlikely(r->prod.tail != prod_head))
		rte_pause();

则是证明了这个数据已经确实塞入数组中,待deque的时候则比较cons.head和prod.tail,不能比较prod.head(因为head只是抢到了数组的位置,数据还没有真实写入),这也是为什么dpdk ring是prod.tail 和 prod.head 两个的原因。

比较下dpdk ring和常规multi ring的区别:

锁的粒度更小了,只对head,tail这些值做同步,对于数据真正塞入ring中其实并没有锁同步。

so,可以改造常规的ring实现,也有prod.head和prod.tail, 做同步时仅对head同步,各个线程获得当前的prod_head和prod_next,然后再进行真正的入队列,再更新prod.tail,当然这就和dpdk ring 一样了。。。。简单一点的head同步可以用锁,这样锁的范围小很多。。。




  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值