下面的方法更快而且对于cache的压力更小
// Simpler structure
struct record {
int spin = 0;
char data[64];
};
struct record *node;
while (node->spin || ! __sync_bool_compare_and_swap(&node->spin , 0 , 1)); // spin
memcpy(destination,source,NoOfBytes);
node->spin = 0;
不确定的使用CAS替代 node->spin是不是能提高一点效率。
下面对于对于使用mutex 和 spin lock进行讨论
#include <stdio.h>
#include <pthread.h>
#include <unistd.h>
#include <stdlib.h>
#include <sched.h>
#include <linux/unistd.h>
#include <sys/syscall.h>
#include <errno.h>
#define INC_TO 1000000 // one million...
int global_int = 0;
pid_t gettid( void )
{
return syscall( __NR_gettid );
}
void *thread_routine( void *arg )
{
int i;
int proc_num = (int)(long)arg;
cpu_set_t set;
CPU_ZERO( &set );
CPU_SET( proc_num, &set );
// sched_setaffinity 将每一个线程绑定到一个核
if (sched_setaffinity( gettid(), sizeof( cpu_set_t ), &set ))
{
perror( "sched_setaffinity" );
return NULL;
}
for (i = 0; i < INC_TO; i++)
{
global_int++;
//__asm__ __volatile__("inc %0" :"=m" (global_int) :"m"(global_int));
// __asm__ __volatile__("incl %0;" : "=m"(global_int) : );
}
return NULL;
}
int main()
{
int procs = 0;
int i;
pthread_t *thrs;
// Getting number of CPUs
procs = (int)sysconf( _SC_NPROCESSORS_ONLN );
if (procs < 0)
{
perror( "sysconf" );
return -1;
}
thrs = malloc( sizeof( pthread_t ) * procs );
if (thrs == NULL)
{
perror( "malloc" );
return -1;
}
printf( "Starting %d threads...\n", procs );
for (i = 0; i < procs; i++)
{
if (pthread_create( &thrs[i], NULL, thread_routine, (void *)(long)i ))
{
perror( "pthread_create" );
procs = i;
break;
}
}
for (i = 0; i < procs; i++)
pthread_join( thrs[i], NULL );
free( thrs );
printf( "After doing all the math, global_int value is: %d\n", global_int );
printf( "Expected value is: %d\n", INC_TO * procs );
return 0;
}
Makefile
TARGET = pg
OBJS = main.o
CC = gcc
CFLAGS = -Wall -D_GNU_SOURCE
LDFLAGS = -pthread
all: $(TARGET)
$(TARGET): $(OBJS)
$(CC) $(OBJS) $(LDFLAGS) -o $(TARGET)
%.o: %.c
$(CC) $(CFLAGS) $< -c -o $@
clean:
rm -f $(TARGET)
rm -f $(OBJS)
call: clean all
执行 make all && ./pg
输出
Starting 40 threads...
After doing all the math, global_int value is: 2942375
Expected value is: 40000000
可以看到及时对于int32字节的加法也不是原子操作,这是因为过程涉及到取值,加法,赋值.