作业地址:Threads and Locking
这次作业的目的在于让我们对多线程有个大概的了解,以及多线程可能引发的问题,通常可以通过加锁来解决。
源码下载地址:ph.c
源码如下:
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <assert.h>
#include <pthread.h>
#include <sys/time.h>
#define SOL
#define NBUCKET 5
#define NKEYS 100000
struct entry {
int key;
int value;
struct entry *next;
};
struct entry *table[NBUCKET];
int keys[NKEYS];
int nthread = 1;
volatile int done;
double
now()
{
struct timeval tv;
gettimeofday(&tv, 0);
return tv.tv_sec + tv.tv_usec / 1000000.0;
}
static void
print(void)
{
int i;
struct entry *e;
for (i = 0; i < NBUCKET; i++) {
printf("%d: ", i);
for (e = table[i]; e != 0; e = e->next) {
printf("%d ", e->key);
}
printf("\n");
}
}
static void
insert(int key, int value, struct entry **p, struct entry *n)
{
struct entry *e = malloc(sizeof(struct entry));
e->key = key;
e->value = value;
e->next = n;
*p = e;
}
static
void put(int key, int value)
{
int i = key % NBUCKET;
insert(key, value, &table[i], table[i]);
}
static struct entry*
get(int key)
{
struct entry *e = 0;
for (e = table[key % NBUCKET]; e != 0; e = e->next) {
if (e->key == key) break;
}
return e;
}
static void *
thread(void *xa)
{
long n = (long) xa;
int i;
int b = NKEYS/nthread;
int k = 0;
double t1, t0;
// printf("b = %d\n", b);
t0 = now();
for (i = 0; i < b; i++) {
// printf("%d: put %d\n", n, b*n+i);
put(keys[b*n + i], n);
}
t1 = now();
printf("%ld: put time = %f\n", n, t1-t0);
// Should use pthread_barrier, but MacOS doesn't support it ...
__sync_fetch_and_add(&done, 1);
while (done < nthread) ;
t0 = now();
for (i = 0; i < NKEYS; i++) {
struct entry *e = get(keys[i]);
if (e == 0) k++;
}
t1 = now();
printf("%ld: get time = %f\n", n, t1-t0);
printf("%ld: %d keys missing\n", n, k);
return NULL;
}
int
main(int argc, char *argv[])
{
pthread_t *tha;
void *value;
long i;
double t1, t0;
if (argc < 2) {
fprintf(stderr, "%s: %s nthread\n", argv[0], argv[0]);
exit(-1);
}
nthread = atoi(argv[1]);
tha = malloc(sizeof(pthread_t) * nthread);
srandom(0);
assert(NKEYS % nthread == 0);
for (i = 0; i < NKEYS; i++) {
keys[i] = random();
}
t0 = now();
for(i = 0; i < nthread; i++) {
assert(pthread_create(&tha[i], NULL, thread, (void *) i) == 0);
}
for(i = 0; i < nthread; i++) {
assert(pthread_join(tha[i], &value) == 0);
}
t1 = now();
printf("completion time = %f\n", t1-t0);
}
运行结果如下:
1: put time = 0.007865
0: put time = 0.007960
1: get time = 7.577803
1: 18289 keys missing
0: get time = 7.589584
0: 18289 keys missing
completion time = 7.597770
可以看到进程1和进程2都丢失了key,原因在于线程在put的时候可能被另一个进程打断,导致ker[]数组里面有些值没有被添加进入链表中。但是由于两个进程都是最后检查的同一个table,所以丢失的key是一样多的。
接着为线程加锁:
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <assert.h>
#include <pthread.h>
#include <sys/time.h>
#define SOL
#define NBUCKET 5
#define NKEYS 100000
#define ADD_LOCK
#ifdef ADD_LOCK
pthread_mutex_t lock;
#endif
struct entry {
int key;
int value;
struct entry *next;
};
struct entry *table[NBUCKET];
int keys[NKEYS];
int nthread = 1;
volatile int done;
double
now()
{
struct timeval tv;
gettimeofday(&tv, 0);
return tv.tv_sec + tv.tv_usec / 1000000.0;
}
static void
print(void)
{
int i;
struct entry *e;
for (i = 0; i < NBUCKET; i++) {
printf("%d: ", i);
for (e = table[i]; e != 0; e = e->next) {
printf("%d ", e->key);
}
printf("\n");
}
}
static void
insert(int key, int value, struct entry **p, struct entry *n)
{
struct entry *e = malloc(sizeof(struct entry));
e->key = key;
e->value = value;
e->next = n;
*p = e;
}
static
void put(int key, int value)
{
#ifdef ADD_LOCK
pthread_mutex_lock(&lock);
#endif
int i = key % NBUCKET;
insert(key, value, &table[i], table[i]);
#ifdef ADD_LOCK
pthread_mutex_unlock(&lock);
#endif
}
static struct entry*
get(int key)
{
#ifdef ADD_LOCK
pthread_mutex_lock(&lock);
#endif
struct entry *e = 0;
for (e = table[key % NBUCKET]; e != 0; e = e->next) {
if (e->key == key) break;
}
#ifdef ADD_LOCK
pthread_mutex_unlock(&lock);
#endif
return e;
}
static void *
thread(void *xa)
{
long n = (long) xa;
int i;
int b = NKEYS/nthread;
int k = 0;
double t1, t0;
// printf("b = %d\n", b);
t0 = now();
for (i = 0; i < b; i++) {
// printf("%d: put %d\n", n, b*n+i);
put(keys[b*n + i], n);
}
t1 = now();
printf("%ld: put time = %f\n", n, t1-t0);
// Should use pthread_barrier, but MacOS doesn't support it ...
__sync_fetch_and_add(&done, 1);
while (done < nthread) ;
t0 = now();
for (i = 0; i < NKEYS; i++) {
struct entry *e = get(keys[i]);
if (e == 0) k++;
}
t1 = now();
printf("%ld: get time = %f\n", n, t1-t0);
printf("%ld: %d keys missing\n", n, k);
return NULL;
}
int
main(int argc, char *argv[])
{
pthread_t *tha;
void *value;
long i;
double t1, t0;
if (argc < 2) {
fprintf(stderr, "%s: %s nthread\n", argv[0], argv[0]);
exit(-1);
}
#ifdef ADD_LOCK
pthread_mutex_init(&lock,NULL);
#endif
nthread = atoi(argv[1]);
tha = malloc(sizeof(pthread_t) * nthread);
srandom(0);
assert(NKEYS % nthread == 0);
for (i = 0; i < NKEYS; i++) {
keys[i] = random();
}
t0 = now();
for(i = 0; i < nthread; i++) {
assert(pthread_create(&tha[i], NULL, thread, (void *) i) == 0);
}
for(i = 0; i < nthread; i++) {
assert(pthread_join(tha[i], &value) == 0);
}
t1 = now();
printf("completion time = %f\n", t1-t0);
}
gcc -g -O2 ph.c -pthread
结果为:
0: put time = 0.022174
1: put time = 0.024050
1: get time = 15.264443
1: 0 keys missing
0: get time = 15.264444
0: 0 keys missing
completion time = 15.288895
事实上,错误发生在put的时候,get的时候因为都是只读,所以不会造成错误。当注释掉get里面的锁之后,再编译运行:
0: put time = 0.032169
1: put time = 0.032082
0: get time = 7.478483
0: 0 keys missing
1: get time = 7.478694
1: 0 keys missing
completion time = 7.511080
效率提高了不少。
总结:
多线程设计到对内存写的时候,一定要考虑数据不一定正确的情况,在这种情况下可以通过加锁来防止错误发生。
END.