1. 多线程访问共享变量的竞争
// This program demonstrates the need for synchronization among
// cooperating threads in a multithreaded application. Several
// threads here will separately increment a shared counter, but
// without any synchronization mechanisms in this initial demo.
//
// compile using: $ g++ concur1.cpp -o concur1
// execute using: $ ./concur1
//
//-------------------------------------------------------------------
#include <stdio.h> // for printf(), perror()
#include <stdlib.h> // for atoi(), malloc(), exit()
#include <sched.h> // for clone()
#include <sys/wait.h> // for wait()
#define FLAGS ( SIGCHLD | CLONE_VM )
#define STACKSIZE 4096
#define N_THREADS 4
int maximum = 2500000; // default number of iterations
int counter = 0; // the shared counter is global
int my_thread( void *dummy )
{
int i, temp;
for (i = 0; i < maximum; i++)
{
temp = counter;
temp += 1;
counter = temp;
}
return 0;
}
int main( int argc, char **argv )
{
//--------------------------------------------
// accept an argument that modifies 'maximum'
//--------------------------------------------
if ( argc > 1 ) maximum = atoi( argv[1] );
if ( maximum < 0 ) { fprintf( stderr, "Bad argument\n" ); exit(1); }
printf( "%d threads will increment a shared counter %d times\n",
N_THREADS, maximum );
//-----------------------------------------
// allocate memory for our threads' stacks
//-----------------------------------------
char *thread_stacks = (char*)malloc( STACKSIZE * N_THREADS );
if ( thread_stacks == NULL ) { perror( "malloc" ); exit(1); }
//-------------------------------------------------
// parent-process activates several worker-threads
//-------------------------------------------------
char *tos = thread_stacks;
for (int i = 0; i < N_THREADS; i++)
{
tos += STACKSIZE;
clone( my_thread, tos, FLAGS, NULL );
}
//-----------------------------------------------
// parent waits for all worker-threads to finish
//-----------------------------------------------
for (int i = 0; i < N_THREADS; i++) wait( NULL );
//----------------------------------------------
// release memory allocated for threads' stacks
//----------------------------------------------
free( thread_stacks );
//--------------------------------------------
// display the resulting value of the counter
//--------------------------------------------
printf( "result: counter=%d\n", counter );
}
输出
l@l-System-Product-Name:~/ll/kernel/sync$ ./count1
4 threads will increment a shared counter 2500000 times
result: counter=3081429
l@l-System-Product-Name:~/ll/kernel/sync$ ./count1
4 threads will increment a shared counter 2500000 times
result: counter=4973504
l@l-System-Product-Name:~/ll/kernel/sync$ ./count1
4 threads will increment a shared counter 2500000 times
result: counter=2539290
l@l-System-Product-Name:~/ll/kernel/sync$ ./count1
4 threads will increment a shared counter 2500000 times
result: counter=4080520
可见结果是不可预测的,这就是由于多个县城同时访问 counter而没有同步的保护措施
2.clone()
int clone(int (*fn)(void *), void *child_stack, int flags, void *arg);
产生内核级线程
这里fn是函数指针,我们知道进程的4要素,这个就是指向程序的指针,就是所谓的“剧本", child_stack明显是为子进程分配系统堆栈空间,flags就是标志用来描述你需要从父进程继承那些资源, arg就是传给子进程的参数)。下面是flags可以取的值
标志 含义
CLONE_PARENT 创建的子进程的父进程是调用者的父进程,新进程与创建它的进程成了“兄弟”而不是“父子”
CLONE_FS 子进程与父进程共享相同的文件系统,包括root、当前目录、umask
CLONE_FILES 子进程与父进程共享相同的文件描述符(file descriptor)表
CLONE_NEWNS 在新的namespace启动子进程,namespace描述了进程的文件hierarchy
CLONE_SIGHAND 子进程与父进程共享相同的信号处理(signal handler)表
CLONE_PTRACE 若父进程被trace,子进程也被trace
CLONE_VFORK 父进程被挂起,直至子进程释放虚拟内存资源
CLONE_VM 子进程与父进程运行于相同的内存空间
CLONE_PID 子进程在创建时PID与父进程一致
CLONE_THREAD Linux 2.4中增加以支持POSIX线程标准,子进程与父进程共享相同的线程群
3.引入互斥锁的保护
//-------------------------------------------------------------------
// concur2.cpp
//
// This program revises our 'concur1.cpp' demo, incorporating a
// spinlock that implements 'mutual exclusion' so that only one
// thread at a time accesses the shared 'counter' variable, but
// at the cost of introducing considerable 'busy-waiting' (plus
// bus-contention if executed on a multiprocessor platform).
//
//-------------------------------------------------------------------
#include <stdio.h> // for printf(), perror()
#include <stdlib.h> // for atoi(), malloc(), exit()
#include <sched.h> // for clone()
#include <sys/wait.h> // for wait()
#define FLAGS ( SIGCHLD | CLONE_VM )
#define STACKSIZE 4096
#define N_THREADS 4
int mutex = 1;
//mutex = 0,则会一直跳转到spin标号处执行测试;
//mutex = 1,进入临界区
void inline enter_critical( void )
{
//btr 位测试并清0 (把mutex的第0位保存在CF位中,并把指定位(0) 置为0)
//lock for mutiple_processor 锁总线,防止多cpu的并发访问内存(否则,多cpu会导致死锁 另一个置CF = 0)
asm("spin: lock btr $0, mutex ");
//判断CF位,若为1则执行后面的指令;若为0,则跳转
asm(" jnc spin ");
}
// 退出临界区时,mutex = 1
void inline leave_critical( void )
{
//btr 位测试并置1 (把mutex的第0位保存在CF位中,并把指定位(0) 置为1)
asm(" bts $0, mutex ");
}
int maximum = 2500000; // default number of iterations
int counter = 0; // the shared counter is global
int my_thread( void *dummy )
{
int i, temp;
for (i = 0; i < maximum; i++)
{
enter_critical();//-1
temp = counter;
temp += 1;
counter = temp;
leave_critical();//+1
}
return 0;
}
int main( int argc, char **argv )
{
//--------------------------------------------
// accept an argument that modifies 'maximum'
//--------------------------------------------
if ( argc > 1 ) maximum = atoi( argv[1] );
if ( maximum < 0 ) { fprintf( stderr, "Bad argument\n" ); exit(1); }
printf( "%d threads will increment a shared counter %d times\n",
N_THREADS, maximum );
//-----------------------------------------
// allocate memory for our threads' stacks
//-----------------------------------------
char *thread_stacks = (char*)malloc( STACKSIZE * N_THREADS );
if ( thread_stacks == NULL ) { perror( "malloc" ); exit(1); }
//-------------------------------------------------
// parent-process activates several worker-threads
//-------------------------------------------------
char *tos = thread_stacks;
for (int i = 0; i < N_THREADS; i++)
{
tos += STACKSIZE;
clone( my_thread, tos, FLAGS, NULL );
}
//-----------------------------------------------
// parent waits for all worker-threads to finish
//-----------------------------------------------
for (int i = 0; i < N_THREADS; i++) wait( NULL );
//----------------------------------------------
// release memory allocated for threads' stacks
//----------------------------------------------
free( thread_stacks );
//--------------------------------------------
// display the resulting value of the counter
//--------------------------------------------
printf( "result: counter=%d\n", counter );
}
输出:
l@l-System-Product-Name:~/ll/kernel/sync$ ./count2
4 threads will increment a shared counter 2500000 times
result: counter=10000000
可见在访问共享的临界区前后引入了互斥变量,得到了保护 ,但要注意的是这里产生了新的问题,就是"忙等"(等不到资源的线程在相当长时间内会一直占有cpu来测试状态)。
另外,注意lock锁住了总线,否则在多cpu的情况下会产生"死锁"问题.
4.nanosleep 系统调用来yiled cpu
//-------------------------------------------------------------------
// yielding.cpp
//
// This program investigates the ability of the 'nanosleep()'
// system-call to allow a Linux task to voluntarily yield its
// control of the processor to another task, as might well be
// preferable to busy-waiting in a multithreaded application.
// The Linux ‘nanosleep()’ system-call allows a thread to ‘yield’ its time-slice
//-------------------------------------------------------------------
#include <stdio.h> // for printf()
#include <stdlib.h> // for exit()
#include <unistd.h> // for fork()
#include <sys/wait.h> // for wait()
#include <time.h> // for nanosleep()
#define N_TASKS 4
int main( int argc, char **argv )
{
// create a 1-nanosecond timespec structure
struct timespec ts = { 0, 10 };
// parent forks a series of child-processes
int taskid = 0;
for (int i = 0; i < N_TASKS; i++)
{
++taskid;
//父进程,返回子进程id(非0),执行continue
if ( fork() ) continue;
// 子进程返回0,执行下面的代码
for (int j = 0; j < 4; j++)
{
printf( "task #%d \n", taskid );
if ( argc == 1 ) nanosleep( &ts, NULL ); //yield
}
exit(0);
//子进程执行的代码
}
// parent waits until each child has terminated
for (int i = 0; i < N_TASKS; i++) wait( NULL );
printf( "finished\n" );
}
输出:
非 yield
task #1
task #1
task #1
task #1
task #3
task #3
task #3
task #3
task #2
task #2
task #2
task #2
task #4
task #4
task #4
task #4
finished
yield
task #1
task #2
task #1
task #2
task #1
task #3
task #4
task #2
task #1
task #3
task #2
task #3
task #4
task #3
task #4
task #4
finished
可见不让出cpu和让出cpu 用户线程的执行 差异很大,其中
struct timespec
{
time_t tv_sec; //seconds
long tv_nsec; //nanoseconds
};
是一个时间结构体
5. 解决“忙等”问题
void inline enter_critical( void )
{
asm("spin: lock btr $0, mutex ");
asm(" jc pass "); //CF = 1,跳转到pass;CF = 0,执行nanosleep
nanosleep( &ts, NULL ); // <-- system-call inserted here
asm(" jmp spin ");//跳转到spin 继续测试锁
asm("pass: ");
}