04
09/12
Lock free queue 大比拼
有人在微博上发起了挑战,貌似很碉堡的样子,特别还创建了一个网站:
http://coderpk.com/
一向对解决此类问题有所兴趣于是尝试了我的做法:
首先我想到的问题是我去设计一个纯粹的多读多写无锁队列,肯定吃力不讨好(这家伙必然是实现了这种算法),而且要实现这种算法估计要花不少时间,于是我将其进行了转化,要想比他好,减少竞争是必须的。可以知道的是atomic操作的竞争已经很小了 但是毕竟要花不少时间代价的 我们的目标是没有这些atomic或者说尽量少出现。第一反应便是切割,将一个一个完整的队列划分成N个份,而任意一份都是都是1读1写的队列,再这个上面进行一层封装,于是从外部看起来其实就是一个队列,其实内部是N个队列,效果是显著的,基本上快了2-3倍
1 | #include <stdio.h> |
2 | #include <pthread.h> |
3 | #include <string.h> |
4 | #include <stdlib.h> |
5 | #include "time_count.h" |
6 | #include "stdint.h" |
7 | |
8 | const int count_per_thread_push = 100000000; |
9 | |
10 | const float CPU_MHZ = 1596; //use cat /proc/cpuinfo get the value |
11 | const float CPU_tick_count_per_second = CPU_MHZ*1000*1000; |
12 | |
13 | #define RING_BUFF_SIZE 65536 |
14 | #define RING_BUFF_NUM 10 |
15 | |
16 | static __thread int __thread_id__ = -1; |
17 | |
18 | typedef struct RingBuffer { |
19 | unsigned long long *buffer; |
20 | int head; |
21 | int tail; |
22 | int size; |
23 | int po; |
24 | int pu; |
25 | pthread_mutex_t push_used; |
26 | pthread_mutex_t pop_used; |
27 | } RingBuffer; |
28 | |
29 | inline bool init_rb( struct RingBuffer* rb, int size) { |
30 | rb->buffer = (unsigned long long *) malloc ( sizeof (unsigned long long ) * (size + 1)); |
31 | if (rb->buffer == NULL) { |
32 | return false ; |
33 | } |
34 | rb->head = 0; |
35 | rb->tail = 0; |
36 | rb->po = 0; |
37 | rb->pu = 0; |
38 | pthread_mutex_init(&(rb->push_used), NULL); |
39 | pthread_mutex_init(&(rb->pop_used), NULL); |
40 | rb->size = size + 1; |
41 | |
42 | return true ; |
43 | } |
44 | |
45 | inline void free_rb_buff( struct RingBuffer* rb) { |
46 | if (rb) { |
47 | if (rb->buffer) { |
48 | free (rb->buffer); |
49 | } |
50 | } |
51 | } |
52 | |
53 | inline bool rb_push( struct RingBuffer* rb, unsigned long long number) { |
54 | if ((rb->tail + 1) % rb->size == rb->head) { |
55 | return false ; |
56 | } |
57 | rb->buffer[rb->tail] = number; |
58 | rb->tail = (rb->tail + 1) % rb->size; |
59 | return true ; |
60 | } |
61 | |
62 | inline bool rb_pop( struct RingBuffer* rb) { |
63 | if (rb->tail == rb->head) { |
64 | return true ; |
65 | } |
66 | bool ret = (rb->buffer[rb->head] != 0); |
67 | rb->head = (rb->head + 1) % rb->size; |
68 | return ret; |
69 | } |
70 | |
71 | struct lock_free_queue { |
72 | void push(unsigned long long pop_time) |
73 | { |
74 | while (__thread_id__ == -1) { |
75 | for ( int i = pop_time % RING_BUFF_NUM; i < RING_BUFF_NUM; i = (i + 1) % RING_BUFF_NUM) { |
76 | pthread_mutex_lock(&(rb[i].push_used)); |
77 | if (rb[i].pu == 0) { |
78 | rb[i].pu = 1; |
79 | __thread_id__ = i; |
80 | pthread_mutex_unlock(&(rb[i].push_used)); |
81 | break ; |
82 | } |
83 | pthread_mutex_unlock(&(rb[i].push_used)); |
84 | } |
85 | } |
86 | while (!rb_push(rb + __thread_id__, pop_time)); |
87 | }; |
88 | |
89 | bool pop() |
90 | { |
91 | while (__thread_id__ == -1) { |
92 | for ( int i = 0; i < RING_BUFF_NUM; i++) { |
93 | pthread_mutex_lock(&(rb[i].pop_used)); |
94 | if (rb[i].po == 0) { |
95 | rb[i].po = 1; |
96 | __thread_id__ = i; |
97 | pthread_mutex_unlock(&(rb[i].pop_used)); |
98 | break ; |
99 | } |
100 | pthread_mutex_unlock(&(rb[i].pop_used)); |
101 | } |
102 | } |
103 | |
104 | // if(now%(1000000) == 0) { |
105 | // printf("task get:%u,task write:%u,latency:%lf\n", |
106 | // m_head[now],rdtsc(),(rdtsc()-m_head[now])/CPU_tick_count_per_second); |
107 | // } |
108 | |
109 | return rb_pop(rb + __thread_id__); |
110 | }; |
111 | |
112 | lock_free_queue() |
113 | { |
114 | for ( int i = 0; i < RING_BUFF_NUM; i++) { |
115 | init_rb(rb + i, RING_BUFF_SIZE); |
116 | } |
117 | }; |
118 | |
119 | ~lock_free_queue() |
120 | { |
121 | for ( int i = 0; i < RING_BUFF_NUM; i++) { |
122 | free_rb_buff(rb + i); |
123 | } |
124 | } |
125 | |
126 | struct RingBuffer rb[RING_BUFF_NUM]; |
127 | }; |
128 | |
129 | |
130 | void * pop( void * queue) |
131 | { |
132 | lock_free_queue* lfq = (lock_free_queue*)queue; |
133 | do { |
134 | } while (lfq->pop()); |
135 | |
136 | return NULL; |
137 | } |
138 | |
139 | void * push( void * queue) |
140 | { |
141 | lock_free_queue* lfq = (lock_free_queue*)queue; |
142 | for ( int i=0;i<count_per_thread_push/4;++i) |
143 | { |
144 | unsigned long long now = rdtsc(); |
145 | lfq->push(now); |
146 | lfq->push(now); |
147 | lfq->push(now); |
148 | lfq->push(now); |
149 | } |
150 | |
151 | // new add for signal stop |
152 | lfq->push(0); |
153 | return NULL; |
154 | } |
155 | |
156 | //void* push_end(void* queue) |
157 | //{ |
158 | // lock_free_queue* lfq = (lock_free_queue*)queue; |
159 | // for(int i=0;i<1000;++i) |
160 | // { |
161 | // lfq->push(0); |
162 | // } |
163 | //} |
164 | |
165 | int main( void ) |
166 | { |
167 | pthread_t* thread_pop = (pthread_t*) malloc (10* sizeof ( pthread_t)); |
168 | pthread_t* thread_push = (pthread_t*) malloc (10* sizeof ( pthread_t)); |
169 | pthread_t* thread_push_end = (pthread_t*) malloc ( sizeof ( pthread_t)); |
170 | lock_free_queue lfq; |
171 | |
172 | for ( int i=0;i<10;++i) |
173 | { |
174 | pthread_create(&thread_push[i],NULL,push,&lfq); |
175 | } |
176 | |
177 | |
178 | for ( int i=0;i<10;++i) |
179 | { |
180 | pthread_create(&thread_pop[i],NULL,pop,&lfq); |
181 | } |
182 | |
183 | for ( int i=0;i<10;++i) //make push end |
184 | { |
185 | pthread_join(thread_push[i],NULL); |
186 | } |
187 | |
188 | //pthread_create(thread_push_end,NULL,push_end,&lfq); //push end signal |
189 | |
190 | for ( int i=0;i<10;++i) //wait pop quit |
191 | { |
192 | pthread_join(thread_pop[i],NULL); |
193 | } |
194 | |
195 | if ( NULL != thread_pop ) |
196 | { |
197 | free (thread_pop); |
198 | thread_pop = NULL; |
199 | } |
200 | if ( NULL != thread_push ) |
201 | { |
202 | free (thread_push); |
203 | thread_push = NULL; |
204 | } |
205 | if ( NULL != thread_push_end ) |
206 | { |
207 | free ( thread_push_end ); |
208 | thread_push_end = NULL; |
209 | } |
210 | } |
2,185 views
Comments (2)
Genesislive
January 30, 2013
9:42 pm
1读1写的队列,这种做法不通用吧,因为不能所有的情况下读写的线程(对象)数目相等。
livexmm
March 15, 2013
1:52 pm
是的 确实有很大的局限性的 但是对于它的问题确实可以