#include <stdint.h>
#include <time.h>
#include <pthread.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
namespace detail {
class Sleeper {
static const uint32_t kMaxActiveSpin = 4000;
uint32_t spinCount;
public:
Sleeper() : spinCount(0) {}
void wait() {
if (spinCount < kMaxActiveSpin) {
++spinCount;
asm volatile("pause");
} else {
struct timespec ts = { 0, 500000 };
nanosleep(&ts, NULL);
}
}
};
}
struct MicroSpinLock {
enum { FREE = 0, LOCKED = 1 };
uint8_t lock_;
bool cas(uint8_t compare, uint8_t newVal) {
bool out;
asm volatile("lock; cmpxchgb %2, (%3);"
"setz %0;"
: "=r" (out)
: "a" (compare),
"q" (newVal),
"r" (&lock_)
: "memory", "flags");
return out;
}
/* try compare 8bit lock with 64bit lock
uint64_t lock_;
bool cas(uint64_t compare, uint64_t newVal) {
bool out;
asm volatile("lock; cmpxchgq %2, (%3);"
"setz %0;"
: "=r" (out)
: "a" (compare),
"q" (newVal),
"r" (&lock_)
: "memory", "flags");
return out;
}
*/
void init() {
lock_ = FREE;
}
bool try_lock() {
return cas(FREE, LOCKED);
}
void lock() {
detail::Sleeper sleeper;
do {
while (lock_ != FREE) {
asm volatile("" : : : "memory");
sleeper.wait();
}
} while (!try_lock());
}
void unlock() {
asm volatile("" : : : "memory");
lock_ = FREE; // release barrier on x86
}
};
size_t __attribute__((aligned(64))) g_uCount = 0;
MicroSpinLock __attribute__((aligned(64))) locker;
void* sum(void*)
{
#ifdef NO_LOCK
for(int i=0;i<250000;++i)
g_uCount ++ ;
#endif
#ifdef SMALL_LOCK
for(int i=0;i<250000;++i)
{
locker.lock();
g_uCount++;
locker.unlock();
}
#endif
};
int main(void)
{
locker.init();
pthread_t* thread = (pthread_t*) malloc(10*sizeof( pthread_t));
for(int i=0;i<10;++i){
pthread_create(&thread[i],NULL,sum,NULL);
}
for(int i=0;i<10;++i){
pthread_join(thread[i],NULL);
}
free(thread);
//printf("g_uCount:%d\n",g_uCount);
}
以上是实验代码,结果比较满意,不细说了,一个lock只需8个bit,不错,对比了4字节的lock没有明显问题。本博客留给自己和感兴趣的朋友们。