本实例是我在学习原子操作看到一段很有趣的程序,包含的知识很多。
首先上程序:
// test_atomic.cpp : 定义控制台应用程序的入口点。
//
#define _GNU_SOURCE
#include <stdio.h>
#include <pthread.h>
#include <unistd.h>
#include <stdlib.h>
#include <sched.h>
#include <linux/unistd.h>
#include <sys/syscall.h>
#include <errno.h>
#include<linux/types.h>
#include<time.h>
#include <sys/time.h>
#define INC_TO 1000000 // one million...
__u64 rdtsc()
{
__u32 lo,hi;
__asm__ __volatile__
(
"rdtsc":"=a"(lo),"=d"(hi)
);
return (__u64)hi<<32|lo;
}
int global_int = 0;
pthread_mutex_t count_lock = PTHREAD_MUTEX_INITIALIZER;
pid_t gettid( void )
{
return syscall( __NR_gettid );
}
void *thread_routine( void *arg )
{
int i;
int proc_num = (int)(long)arg;
__u64 begin, end;
struct timeval tv_begin,tv_end;
__u64 timeinterval;
cpu_set_t set;
CPU_ZERO( &set );
CPU_SET( proc_num, &set );
if (sched_setaffinity( gettid(), sizeof( cpu_set_t ), &set ))
{
perror( "sched_setaffinity" );
return NULL;
}
begin = rdtsc();
gettimeofday(&tv_begin,NULL);
for (i = 0; i < INC_TO; i++)
{
// global_int++;
__sync_fetch_and_add( &global_int, 1 );
}
gettimeofday(&tv_end,NULL);
end = rdtsc();
timeinterval =(tv_end.tv_sec - tv_begin.tv_sec)*1000000+(tv_end.tv_usec - tv_begin.tv_usec);
fprintf(stderr,"proc_num :%d,__sync_fetch_and_add cost %llu CPU cycle,cost %llu us\n", proc_num,end-begin,timeinterval);
return NULL;
}
void *thread_routine2( void *arg )
{
int i;
int proc_num = (int)(long)arg;
__u64 begin, end;
struct timeval tv_begin,tv_end;
__u64 timeinterval;
cpu_set_t set;
CPU_ZERO( &set );
CPU_SET( proc_num, &set );
if (sched_setaffinity( gettid(), sizeof( cpu_set_t ), &set ))
{
perror( "sched_setaffinity" );
return NULL;
}
begin = rdtsc();
gettimeofday(&tv_begin,NULL);
for(i = 0;i<INC_TO;i++)
{
pthread_mutex_lock(&count_lock);
global_int++;
pthread_mutex_unlock(&count_lock);
}
gettimeofday(&tv_end,NULL);
end = rdtsc();
timeinterval =(tv_end.tv_sec - tv_begin.tv_sec)*1000000 +(tv_end.tv_usec - tv_begin.tv_usec);
fprintf(stderr,"proc_num :%d,pthread lock cost %llu CPU cycle,cost %llu us\n",proc_num,end-begin ,timeinterval);
return NULL;
}
int main()
{
int procs = 0;
int i;
pthread_t *thrs;
// Getting number of CPUs
procs = (int)sysconf( _SC_NPROCESSORS_ONLN );
if (procs < 0)
{
perror( "sysconf" );
return -1;
}
thrs =(pthread_t *) malloc( sizeof( pthread_t ) * procs );
if (thrs == NULL)
{
perror( "malloc" );
return -1;
}
printf( "Starting %d threads...\n", procs );
for (i = 0; i < procs; i++)
{
if (pthread_create( &thrs[i], NULL, thread_routine2,
(void *)(long)i ))
{
perror( "pthread_create" );
procs = i;
break;
}
}
for (i = 0; i < procs; i++)
pthread_join( thrs[i], NULL );
free( thrs );
printf( "After doing all the math, global_int value is: %d\n", global_int );
printf( "Expected value is: %d\n", INC_TO * procs );
return 0;
}
该段程序主要是了测试互斥量和原子操作对全局变量的自增,对系统性能的影响。
指标:通过cpu操作计数和时间
[root@10-4-23-15 wcl]# ./test_atomic
Starting 2 threads...
proc_num :1,__sync_fetch_and_add cost 184322840 CPU cycle,cost 70891 us
proc_num :0,__sync_fetch_and_add cost 198164962 CPU cycle,cost 76216 us
After doing all the math, global_int value is: 2000000
Expected value is: 2000000
2、操作完对全局的自增后,所需要的时间
对全局操作,一个cpu内核,绑定一个线程,对全局变量操作。
其中一个方法使用
__sync_fetch_and_add的原子操作,两一个使用的是互斥变量进行全局家。
cpu_set_t set;
CPU_ZERO( &set );
CPU_SET( proc_num, &set );
设置线程的cpu掩码。将线程一一对应到cpu的核数上,
sched_setaffinity
是线程贴合到具体cpu核上。
如何获取cpu核心数:
procs = (int)sysconf( _SC_NPROCESSORS_ONLN );
几个cpu核启动几个线程。并一一对应绑定。
获取线程ID:
syscall( __NR_gettid );
所以,最终sched_setaffinity 绑定到具体线程上。
测试结果:
[root@10-4-23-15 wcl]# ./test_atomic
Starting 2 threads...
proc_num :1,__sync_fetch_and_add cost 184322840 CPU cycle,cost 70891 us
proc_num :0,__sync_fetch_and_add cost 198164962 CPU cycle,cost 76216 us
After doing all the math, global_int value is: 2000000
Expected value is: 2000000
[root@10-4-23-15 wcl]# ./test_atomic
Starting 2 threads...
proc_num :0,pthread lock cost 492937699 CPU cycle,cost 189598 us
proc_num :1,pthread lock cost 494258362 CPU cycle,cost 190106 us
After doing all the math, global_int value is: 2000000
Expected value is: 2000000
通过测试结果,我们可以看到,互斥量消耗的性能是原子操作的三四倍左右。
更多文章,请访问:http://blog.csdn.net/wallwind