pthread库是linux系统的多线程库。上次写完longjmp和多线程的例子。就想用系统的真线程库改写一下读写线程的例子来和大家分享。真线程库应该比自己写的功能更完善,代码也更容易写吧!… …
说到这里。我不知道接下来说什么了… …。也许是自己对多线程库掌握不够熟练吧!代码早就改好了。但是运行多数时候是好的,有的时候却会crash。我确信这是同步的问题。查了好多天。除了费解还是费解。都怀疑到天气热,我的机器内存数据发生突变了。直到刚才改了一处,才稳定了。目前没发现再crash。这就把代码分享上来。这次被这bug整的代码已经有些过度保守了,不像是我自己的风格,但仍不敢保证没有错误了。万一谁在调试中发现新的错误。淡定,在这基础上改。
现在的数据结构变成这样:
typedef struct _Context_ {
pthread_mutex_t sched;
volatile int wait_thread_lock_sched;
int finish;
struct {
pthread_mutex_t mutex;
volatile int wait_sched_lock_thread;
int pend;
int state;
} thread[2];
} Context;
#define WRITE_THREAD 0
#define READ_THREAD 1
这里用mutex做同步。mutex适合做互斥。在同一线程中上锁,解锁,互斥保护共享资源访问。而理论上是可以做同步的,做同步需要在不同线程中上锁,解锁,实际用下来感觉不理想。
使用pthread线程库时,要注意先测试一下。这一步不能省。我的版本,文档说明说的是没有attr说明的mutex默认创建为普通锁。这可不一定。测试下来发现给的是递归锁。区别在于锁定一个已经锁定mutex,如果上次是自己锁的,普通锁会休眠,而递归锁会成功。
普通用法,在同一线程中对资源上锁,解锁,操作时串行的,不会发生自己两次锁同一个资源问题,这时两者的作用是相同的,除非发生了操作中用了递归函数。可能是因为这个原因,我的系统中默认给了递归锁。
但是同步的用法,上锁和解锁在2个不同的线程中,系统调度是随机的。有可能上锁的线程被调度了2次,而解锁一次也没调度。这样,如果递归锁,它每次上锁都是成功的,它不停下了,它还要继续上锁。这就发生了复杂的问题了。
回到这里的代码,一个调度和2个线程。mutex设置的是普通锁。注意系统的调度仍然在跑,这里意思是自己写的同步的调度,以下简称调度,意思都是自己做的调度。如果线程自己的mutex被别的线程上锁,那么线程运行到这个位置的时候,再次上锁就会休眠,而别的线程解锁,也就唤醒了这个休眠的线程。这样就有了同步的效果。注意休眠线醒来之后已经获得了mutex上锁,它可能需要立即解锁,以便别的线程继续控制锁。
初始化:
pthread_mutexattr_t mat;
pthread_mutexattr_init(&mat);
pthread_mutexattr_settype(&mat, PTHREAD_MUTEX_NORMAL);
pthread_mutex_init(&bx_mutex, &mat);
pthread_mutex_init(&gCtx.sched, &mat);
pthread_mutex_init(&gCtx.thread[0].mutex, &mat);
pthread_mutex_init(&gCtx.thread[1].mutex, &mat);
mutex_lock(&gCtx.thread[0].mutex);
mutex_lock(&gCtx.thread[1].mutex);
start_coroutine((pf) coroutine_write, NULL);
start_coroutine((pf) coroutine_read, NULL);
start_coroutine之后,读写线程有可能已经跑起来了,有可能还没跑,这不一定。之前调度程序已经给它们mutex上了锁,跑起来之后,首先是去对自己的mutex上锁,这样可以进入休眠,让调度程序有序唤醒它们进行操作。
void coroutine_write(void *arg)
{
FILE *fp;
char s[128];
int len;
int i = (int) arg;
i = WRITE_THREAD;
gCtx.thread[i].wait_sched_lock_thread = 999;
mutex_lock(&gCtx.thread[i].mutex);
mutex_unlock(&gCtx.thread[i].mutex);
lock_scheduler();
while (gCtx.thread[i].wait_sched_lock_thread != 0) {
sched_yield();
}
fp = fopen("b.c", "r");
if (!fp) {
printf("can not open `b.c'\n");
exit(0);
}
mutex_lock(&bx_mutex);
while (fgets(s, 128, fp) != NULL) {
len = strlen(s);
if (len) {
--len;
if (s[len] == '\n') s[len] = '\0'; else ++len;
}
if (len) {
--len;
if (s[len] == '\r') s[len] = '\0'; else ++len;
}
bx_write(s, len + 1);
}
bx.eobf = 1;
gCtx.thread[WRITE_THREAD].state = 1;
fclose(fp);
printf("[WRITE_THREAD FINISHED]\n");
report();
yield(WRITE_THREAD);
}
void coroutine_read(void *arg)
{
char buf[128];
int i = (int) arg;
i = READ_THREAD;
gCtx.thread[i].wait_sched_lock_thread = 999;
mutex_lock(&gCtx.thread[i].mutex);
mutex_unlock(&gCtx.thread[i].mutex);
lock_scheduler();
while (gCtx.thread[i].wait_sched_lock_thread != 0) {
sched_yield();
}
mutex_lock(&bx_mutex);
while (!bx.eobf || bx.bc) {
bx_read(buf, 128);
printf("%s\n", buf);
}
gCtx.thread[READ_THREAD].state = 1;
printf("[READ_THREAD FINISHED]\n");
report();
yield(READ_THREAD);
}
线程被调度程序唤醒之后,立即用lock_scheduler()对调度程序的mutex上锁,让调度程序完成调度语义之后可以在这个锁上休眠,等候被调线程完成操作后重新唤醒它。
void lock_scheduler()
{
mutex_lock(&gCtx.sched);
gCtx.wait_thread_lock_sched = 0;
}
调度程序用resume()唤醒读写线程,之一。唤醒之后跟读写线程交换mutex锁定。最后,调度程序的mutex被读写线程锁定,读写线程的mutex被调度程序锁定:
void resume(int th)
{
int i = 0;
gCtx.wait_thread_lock_sched = 1;
gCtx.thread[th].wait_sched_lock_thread = 1;
mutex_unlock(&gCtx.thread[th].mutex);
while (gCtx.wait_thread_lock_sched != 0) {
sched_yield();
}
i = 0;
mutex_lock(&gCtx.thread[th].mutex);
gCtx.thread[th].wait_sched_lock_thread = 0;
}
读写线程做完操作之后,用yield()唤醒调度,同步之后在上了锁的mutex休眠。奇怪的第一句是防止读写线程运行得太快。唤醒之后工作做完了,又一次进入yield,而调度程序上次reume的最后一个同步语句还没有完成。这是有可能发生的。只要OS切换这个调度程序之后,一直不给它CPU,调度程序的动作就会一直被搁置。
void yield(int th)
{
int i = 0;
if (gCtx.thread[th].wait_sched_lock_thread != 0 ) {
gCtx.thread[th].pend++;
do {
sched_yield();
} while(gCtx.thread[th].wait_sched_lock_thread != 0);
}
mutex_unlock(&bx_mutex);
//printf("thread %d yield\n", th);
mutex_unlock(&gCtx.sched);
gCtx.thread[th].wait_sched_lock_thread = -1;
mutex_lock(&gCtx.thread[th].mutex);
mutex_unlock(&gCtx.thread[th].mutex);
gCtx.thread[th].wait_sched_lock_thread = 0;
}
int main()
{
int i;
pthread_mutexattr_t mat;
pthread_mutexattr_init(&mat);
pthread_mutexattr_settype(&mat, PTHREAD_MUTEX_NORMAL);
pthread_mutex_init(&bx_mutex, &mat);
pthread_mutex_init(&gCtx.sched, &mat);
pthread_mutex_init(&gCtx.thread[0].mutex, &mat);
pthread_mutex_init(&gCtx.thread[1].mutex, &mat);
mutex_lock(&gCtx.thread[0].mutex);
mutex_lock(&gCtx.thread[1].mutex);
start_coroutine((pf) coroutine_write, NULL);
start_coroutine((pf) coroutine_read, NULL);
sched_yield();
while (gCtx.thread[0].wait_sched_lock_thread == 0) {
sched_yield();
}
gCtx.thread[0].wait_sched_lock_thread = 0;
while (gCtx.thread[1].wait_sched_lock_thread == 0) {
sched_yield();
}
gCtx.thread[1].wait_sched_lock_thread = 0;
run = 1;
while (TRUE) {
resched:
i = sched();
if (gCtx.thread[i].state == 0) {
resume(i);
mutex_lock(&gCtx.sched);
mutex_unlock(&gCtx.sched);
} else break;
}
printf("done\n");
return 0;
}
现在看起来已经很严谨了。好像已经可以正常运行了。但是不是哦。多线程变成常会遇到各种意想不到的情况。到了这一步,后来就发生了莫名其妙的crash,但不是每次都错。然后是连续不断的调试。直至怀疑系统lib有bug,甚至是不是自己的机器“热坏了”。好在现在调好了,不用怀疑最后这两条了。
最后贴上完整代码,感兴趣的可以自己试下。不保证一定没有错误哦!
#include <stdio.h>
#include <string.h>
#include <setjmp.h>
#include <stdlib.h>
#include <pthread.h>
int mutex_lock(pthread_mutex_t * mutex)
{
return pthread_mutex_lock(mutex);
}
int mutex_unlock(pthread_mutex_t * mutex)
{
return pthread_mutex_unlock(mutex);
}
typedef int BOOL;
#define TRUE 1
#define FALSE 0
extern pthread_mutex_t bx_mutex;
typedef struct _Context_ {
pthread_mutex_t sched;
volatile int wait_thread_lock_sched;
int finish;
struct {
pthread_mutex_t mutex;
volatile int wait_sched_lock_thread;
int pend;
int state;
} thread[2];
} Context;
#define WRITE_THREAD 0
#define READ_THREAD 1
Context gCtx;
static int next_thread;
int run;
void report()
{
printf("thread 0: locksync %d, pend %d, state %d\n",
gCtx.thread[0]. wait_sched_lock_thread,
gCtx.thread[0]. pend,
gCtx.thread[0]. state);
printf("thread 1: locksync %d, pend %d, state %d\n",
gCtx.thread[1]. wait_sched_lock_thread,
gCtx.thread[1]. pend,
gCtx.thread[1]. state);
}
void resume(int th)
{
int i = 0;
gCtx.wait_thread_lock_sched = 1;
gCtx.thread[th].wait_sched_lock_thread = 1;
mutex_unlock(&gCtx.thread[th].mutex);
while (gCtx.wait_thread_lock_sched != 0) {
sched_yield();
}
i = 0;
mutex_lock(&gCtx.thread[th].mutex);
gCtx.thread[th].wait_sched_lock_thread = 0;
}
void lock_scheduler()
{
mutex_lock(&gCtx.sched);
gCtx.wait_thread_lock_sched = 0;
}
void yield(int th)
{
int i = 0;
if (gCtx.thread[th].wait_sched_lock_thread != 0 ) {
gCtx.thread[th].pend++;
do {
sched_yield();
} while(gCtx.thread[th].wait_sched_lock_thread != 0);
}
mutex_unlock(&bx_mutex);
//printf("thread %d yield\n", th);
mutex_unlock(&gCtx.sched);
gCtx.thread[th].wait_sched_lock_thread = -1;
mutex_lock(&gCtx.thread[th].mutex);
mutex_unlock(&gCtx.thread[th].mutex);
gCtx.thread[th].wait_sched_lock_thread = 0;
}
#define MAXBUFS 10
struct buffer {
char buf[MAXBUFS][128];
int pos;
int bc;
int eobf;
};
struct buffer bx;
pthread_mutex_t bx_mutex;
void bx_read(char *s, int n)
{
restart:
if (bx.bc) {
if (!(bx.pos >= 0 && bx.pos < MAXBUFS)) {
printf("error\n");
exit(0);
}
strncpy(s, bx.buf[bx.pos], n);
s[n - 1] = '\0';
bx.bc--;
bx.pos++;
if (bx.pos >= MAXBUFS)
bx.pos = 0;
return;
} else if (bx.eobf) {
gCtx.finish = 1;
gCtx.thread[run].state = 1;
yield(READ_THREAD);
} else {
yield(READ_THREAD);
lock_scheduler();
mutex_lock(&bx_mutex);
goto restart;
}
}
void bx_write(char *s, int n)
{
int wpos;
while (bx.bc >= MAXBUFS) {
yield(WRITE_THREAD);
lock_scheduler();
mutex_lock(&bx_mutex);
}
wpos = bx.pos + bx.bc;
if (wpos >= MAXBUFS)
wpos -= MAXBUFS;
if (!(wpos >= 0 && wpos < MAXBUFS)) {
printf("error\n");
exit(0);
}
if (n < 128) {
strcpy(bx.buf[wpos], s);
}
else {
strncpy(bx.buf[wpos], s, 128);
bx.buf[wpos][127] = '\0';
}
bx.bc++;
}
typedef void *(*pf) (void *);
BOOL start_coroutine(pf func, void *arg);
void coroutine_read(void *arg);
void coroutine_write(void *arg)
{
FILE *fp;
char s[128];
int len;
int i = (int) arg;
i = WRITE_THREAD;
gCtx.thread[i].wait_sched_lock_thread = 999;
mutex_lock(&gCtx.thread[i].mutex);
mutex_unlock(&gCtx.thread[i].mutex);
lock_scheduler();
while (gCtx.thread[i].wait_sched_lock_thread != 0) {
sched_yield();
}
fp = fopen("b.c", "r");
if (!fp) {
printf("can not open `b.c'\n");
exit(0);
}
mutex_lock(&bx_mutex);
while (fgets(s, 128, fp) != NULL) {
len = strlen(s);
if (len) {
--len;
if (s[len] == '\n') s[len] = '\0'; else ++len;
}
if (len) {
--len;
if (s[len] == '\r') s[len] = '\0'; else ++len;
}
bx_write(s, len + 1);
}
bx.eobf = 1;
gCtx.thread[WRITE_THREAD].state = 1;
fclose(fp);
printf("[WRITE_THREAD FINISHED]\n");
report();
yield(WRITE_THREAD);
}
void coroutine_read(void *arg)
{
char buf[128];
int i = (int) arg;
i = READ_THREAD;
gCtx.thread[i].wait_sched_lock_thread = 999;
mutex_lock(&gCtx.thread[i].mutex);
mutex_unlock(&gCtx.thread[i].mutex);
lock_scheduler();
while (gCtx.thread[i].wait_sched_lock_thread != 0) {
sched_yield();
}
mutex_lock(&bx_mutex);
while (!bx.eobf || bx.bc) {
bx_read(buf, 128);
printf("%s\n", buf);
}
gCtx.thread[READ_THREAD].state = 1;
printf("[READ_THREAD FINISHED]\n");
report();
yield(READ_THREAD);
}
BOOL start_coroutine(pf func, void *arg)
{
int thread_id;
pthread_t pt_id;
thread_id = next_thread++;
pthread_create(&pt_id, NULL, func, (void *) thread_id);
pthread_detach(pt_id);
return TRUE;
}
int sched()
{
int s;
if (run == 0) s = 1; else s = 0;
if (gCtx.thread[s].state == 0) run = s;
return run;
}
int main()
{
int i;
pthread_mutexattr_t mat;
pthread_mutexattr_init(&mat);
pthread_mutexattr_settype(&mat, PTHREAD_MUTEX_NORMAL);
pthread_mutex_init(&bx_mutex, &mat);
pthread_mutex_init(&gCtx.sched, &mat);
pthread_mutex_init(&gCtx.thread[0].mutex, &mat);
pthread_mutex_init(&gCtx.thread[1].mutex, &mat);
mutex_lock(&gCtx.thread[0].mutex);
mutex_lock(&gCtx.thread[1].mutex);
start_coroutine((pf) coroutine_write, NULL);
start_coroutine((pf) coroutine_read, NULL);
sched_yield();
while (gCtx.thread[0].wait_sched_lock_thread == 0) {
sched_yield();
}
gCtx.thread[0].wait_sched_lock_thread = 0;
while (gCtx.thread[1].wait_sched_lock_thread == 0) {
sched_yield();
}
gCtx.thread[1].wait_sched_lock_thread = 0;
run = 1;
while (TRUE) {
resched:
i = sched();
if (gCtx.thread[i].state == 0) {
resume(i);
mutex_lock(&gCtx.sched);
mutex_unlock(&gCtx.sched);
} else break;
}
printf("done\n");
return 0;
}
最后的代码里多出了 一个bx_mutex锁。用来保护bx缓冲区。程序实在调不出来才被逼加上了。但是加上了还是没用!现在如果去掉又要好复杂的调试,已经没有心情去做了。
最后解释一下,究竟出了什么bug,这么难搞。后来又是改在哪里好起来的。供大家参考。缓冲区bx_write上来有这一句:
int wpos;
while (bx.bc >= MAXBUFS) {
yield(WRITE_THREAD);
lock_scheduler();
mutex_lock(&bx_mutex);
}
while 原来写的是if。查我的前文可以看到对比。为什么这么改?linux内核编程中有告诫,睡眠进程被唤醒之后,需要重新检查它等待的条件,不能直接认为它等待的条件已经成立。如果不成立,需要重新进入睡眠。