手写死锁检测组件

死锁发生的原因

当一个进程/线程在等待永远不可能获取到的资源时,就会产生死锁。

死锁产生的四个必要条件:

  • 资源互斥
  • 每个进程占有资源并等待其他资源
  • 系统不能剥夺进程资源
  • 进程资源图是一个环路

如下图所示,线程A占有资源1,线程B占有资源2,线程C占有资源3。如果线程A要申请资源2,线程B要申请资源3,线程C要申请资源1,进程之间就形成了一个资源依赖的环路,就会发生死锁。

image.png

image.png

#include <pthread.h>
#include <unistd.h>

pthread_mutex_t mtx1 = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t mtx2 = PTHREAD_MUTEX_INITIALIZER;

void *thread_routine_a(void *arg) {

    pthread_mutex_lock(&mtx1);
    sleep(1);

    pthread_mutex_lock(&mtx2);

    pthread_mutex_unlock(&mtx1);
    pthread_mutex_unlock(&mtx2);

}

void *thread_routine_b(void *arg) {

    pthread_mutex_lock(&mtx2);
    sleep(1);

    pthread_mutex_lock(&mtx1);

    pthread_mutex_unlock(&mtx2);
    pthread_mutex_unlock(&mtx1);
    
}


int main() {

    pthread_t tid1, tid2;

    pthread_create(&tid1, NULL, thread_routine_a, NULL);
    pthread_create(&tid2, NULL, thread_routine_b, NULL);

    pthread_join(tid1, NULL);
    pthread_join(tid2, NULL);

}

死锁的解决方法

死锁产生后,解决措施是打破四大条件,有下列方法

  • 死锁预防:采用某种策略校址并发进程对于资源的请求,破坏死锁产生的四个条件之一,是系统任何时刻都不满足死锁的条件。
  • 死锁避免:一般采用银行家算法来避免,银行家算法,就是提前计算出一条不会死锁的资源分配方法,才分配资源,否则不分配资源,相当于借贷,考虑对方还得起才借钱。
  • 死锁检测:允许死锁产生,但系统定时运行一个检测死锁的程序,若检测到系统总发生死锁,则设法加以解除。
  • 死锁解除:即死锁发生后的解除方法,如强制剥夺资源,撤销进程等。

本文只讨论死锁检测的实现。

死锁检测的实现原理

通过检测是否存在资源依赖的环路,来检测是否存在死锁。资源依赖环采用有向图来存储。线程A获取线程B已占用的锁,则为线程A指向线程B。启动一个线程来检测图是否有环的存在。

数据结构定义

typedef struct _node {

    uint64_t threadid;
    uint64_t lockid;

    int degress;

} node;

typedef struct _vertex {

    node n;
    struct _vertex *next;


} vertex;

typedef struct _thread_graph {

    vertex list[MAX]; // selfid -> thid,记录本线程都指向哪些线程
    int num;

    
    node locklist[MAX]; // mutex is locked by which thread,锁当前是由哪个线程占有
    int lockidx;

} thread_graph;

图的操作方法及检测图中是否有环

thread_graph *tg = NULL;
char visited[MAX];
int path[MAX];
int k = 0;
int deadlock = 0;


vertex *create_vertex(node n) {

    vertex *vtx = (vertex *)calloc(1, sizeof(vertex));
    if (vtx == NULL) return NULL;

    vtx->n = n;
    vtx->next = NULL;

    return vtx;
}

int search_vertex(node n) {

    int i = 0;
    for (i = 0; i < tg->num; i++) {
        if (tg->list[i].n.threadid == n.threadid) {

            return i;
        }
    }

    return -1;

}

void add_vertex(node n) {

    if (search_vertex(n) == -1) {

        tg->list[tg->num].n = n;
        tg->list[tg->num].next = NULL;
        tg->num ++;
    }

}

void add_edge(node from, node to) {

    add_vertex(from);
    add_vertex(to);

    vertex *vtx = &tg->list[search_vertex(from)];

    while (vtx->next != NULL) {
        vtx = vtx->next;
    }

    vtx->next = create_vertex(to);

}

int verify_edge(node from, node to) {

    if (tg->num == 0) return 0;

    int idx = search_vertex(from);
    if (idx == -1) {
        return 0;
    }

    vertex *vtx = &(tg->list[idx]);

    while (vtx != NULL) {

        if (vtx->n.threadid == to.threadid) return 1;

        vtx = vtx->next;
        
    }

    return 0;

}

int remove_edge(node from, node to) {

    int idxi = search_vertex(from);
    int idxj = search_vertex(to);

    if (idxi != -1 && idxj != -1) {

        vertex *v = &tg->list[idxi];
        vertex *temp;

        while (v->next != NULL) {

            if (v->next->n.threadid == to.threadid) {

                temp = v->next;
                v->next = v->next->next;

                free(temp);
                break;

            }

            v = v->next;
        }

    }

}

void print_deadlock(void) {

    int i = 0;

    printf("deadlock : ");
    for (i = 0;i < k-1; i ++) {

        printf("%ld --> ", tg->list[path[i]].n.threadid);

    }

    printf("%ld\n", tg->list[path[i]].n.threadid);

}

int dfs(int idx) {

    vertex *vtx = &tg->list[idx];

    if (visited[idx] == 1) {
        // deadlock
        path[k++] = idx;
        deadlock = 1;

        print_deadlock();

        return 0;
    }

    visited[idx] = 1;
    path[k++] = idx;

    while (vtx->next) {

        dfs(search_vertex(vtx->next->n));
        k--;

        vtx = vtx->next;

    }

    return 1;

}

int search_for_cycle(int idx) {

    vertex *vtx = &tg->list[idx];
    visited[idx] = 1;
    k = 0;
    path[k++] = idx;

    while (vtx->next != NULL) {

        int i = 0;
        for (i = 0;i < tg->num;i ++) {
            if (i == idx) continue;
            
            visited[i] = 0;
        }

        for (i = 1;i <= MAX;i ++) {
            path[i] = -1;
        }
        k = 1;

        dfs(search_vertex(vtx->next->n));
        vtx = vtx->next;
    }

}

测试代码

#if 1 // graph cycle test
int main() {


	tg = (thread_graph*)malloc(sizeof(thread_graph));
	tg->num = 0;

	node v1;
	v1.threadid = 1;
	add_vertex(v1);

	node v2;
	v2.threadid = 2;
	add_vertex(v2);

	node v3;
	v3.threadid = 3;
	add_vertex(v3);

	node v4;
	v4.threadid = 4;
	add_vertex(v4);

	
	node v5;
	v5.threadid = 5;
	add_vertex(v5);


	add_edge(v1, v2);
	add_edge(v2, v3);
	add_edge(v3, v4);
	add_edge(v4, v5);
	add_edge(v3, v1);
	
	search_for_cycle(search_vertex(v1));

}
#endif

测试结果

image.png

 对lock/unlock进行hook

怎样去构建图中的数据呢?就需要在lock、unlock的时候去维护图的结构,比较好的方法就是对pthread_mutex_lock和pthread_mutex_unlock这两个函数进行hook, 让程序中调用这两个函数的地方,都使用我们自己定义的函数。

typedef int (*pthread_mutex_lock_t)(pthread_mutex_t *mutex);
pthread_mutex_lock_t pthread_mutex_lock_f;

typedef int (*pthread_mutex_unlock_t)(pthread_mutex_t *mutex);
pthread_mutex_unlock_t pthread_mutex_unlock_f;


int pthread_mutex_lock(pthread_mutex_t *mutex) {

    pthread_t selfid = pthread_self();

    lock_before(selfid, (uint64_t)mutex);

    printf("pthread_mutex_lock selfid: %lu, mutex: %p\n", selfid, mutex);

    pthread_mutex_lock_f(mutex);

    lock_after(selfid, (uint64_t)mutex);

}

int pthread_mutex_unlock(pthread_mutex_t *mutex) {

    pthread_t selfid = pthread_self();

    printf("pthread_mutex_unlock\n");

    pthread_mutex_unlock_f(mutex);

    unlock_after(selfid, (uint64_t)mutex);

}

static int init_hook() {

    pthread_mutex_lock_f = (pthread_mutex_lock_t)dlsym(RTLD_NEXT, "pthread_mutex_lock");

    pthread_mutex_unlock_f = (pthread_mutex_unlock_t)dlsym(RTLD_NEXT, "pthread_mutex_unlock");

}

lock_before、lock_after和unlock_after

lock_before、lock_after和unlock_after在系统条用的前后对图进行操作, 它们是整个内存泄漏检测实现的核心。

lock_before

lock_before, 就是进行获取锁前的操作check 锁当前是否被占用,如果被占用,则需要添加一条边,由当前线程指向当前占有该锁的线程。

void lock_before(uint64_t threadid, uint64_t lockid) {

    int idx = 0;
    for (idx = 0; idx < tg->lockidx; idx++) {

        if (lockid == tg->locklist[idx].lockid) {

            node from;
            from.threadid = threadid;
            add_vertex(from);

            node to;
            to.threadid = tg->locklist[idx].threadid;
            add_vertex(to);

            tg->locklist[idx].degress++; // inc

            if (!verify_edge(from, to)) {
                add_edge(from, to);
                // 此处可以加上环的检测,可以实现死锁的预防
                // 如果有环,则remove_edge
            }

        }

    }

}

int search_lock(uint64_t lockid) {

    int idx = 0;

    for (idx = 0; idx < tg->lockidx; idx++) {

        if (lockid == tg->locklist[idx].lockid) {
            return idx;
        }

    }

    return -1;

}

lock_after

lock_after, 就是获取锁成功后的操作。

需要check锁之前是否被其他线程占有。如果之前没有被其他线程占有,则找到合适位置来放锁的信息,并且需要将锁的占有者设置为当前线程;如果锁之前是被其他线程占有,则需要删除edge。


int search_empty_lock(uint64_t lockid) {

    int idx = 0;

    for (idx = 0; idx < tg->lockidx; idx++) {

        if (0 == tg->locklist[idx].lockid) {
            return idx;
        }

    }

    return tg->lockidx;

}

void lock_after(uint64_t threadid, uint64_t lockid) {
    
    int idx = 0;
    if (-1 == (idx = search_lock(lockid))) {

        int emp_idx = search_empty_lock(lockid);
        tg->locklist[emp_idx].lockid = lockid;
        tg->locklist[emp_idx].threadid = threadid;

        tg->lockidx++; // inc

    } else {

        node from;
        from.threadid = threadid;
        add_vertex(from);

        node to;
        to.threadid = tg->locklist[idx].threadid;
        add_vertex(to);

        tg->locklist[idx].degress--; // inc

        if (!verify_edge(from, to)) {
            remove_edge(from, to);
        }

        tg->locklist[idx].threadid = threadid;

    }

}

unlock_after

unlock_after, 就是释放锁后的操作。需要将锁的信息清空。

void unlock_after(uint64_t threadid, uint64_t lockid) {
    
    int idx = search_lock(lockid);

    if (tg->locklist[idx].degress == 0) {

        tg->locklist[idx].lockid = 0;
        tg->locklist[idx].threadid = 0;

    }

}

启动线程进行检测

void check_dead_lock(void) {

    int i = 0;

    deadlock = 0;
    for (i = 0; i < tg->num; i ++) {
        if (deadlock == 1) break;
        search_for_cycle(i);
    }

    if (deadlock == 0) {
        printf("no deadlock\n");
    }

}

static void *thread_routine(void *args) {

    while (1) {

        sleep(5);
        check_dead_lock();

    }

    return NULL;

}

void start_check(void) {

    tg = (thread_graph*)calloc(1, sizeof(thread_graph));
    tg->num = 0;
    tg->lockidx = 0;
    
    pthread_t tid;

    pthread_create(&tid, NULL, thread_routine, NULL);

}

测试


pthread_mutex_t mtx1 = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t mtx2 = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t mtx3 = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t mtx4 = PTHREAD_MUTEX_INITIALIZER;

void *thread_routine_a(void *arg) {

    printf("thread_routine a\n");
    pthread_mutex_lock(&mtx1);

    sleep(1);

    pthread_mutex_lock(&mtx2);

    pthread_mutex_unlock(&mtx2);

    pthread_mutex_unlock(&mtx1);

    return NULL;
}

void *thread_routine_b(void *arg) {

    printf("thread_routine b\n");
    pthread_mutex_lock(&mtx2);

    sleep(1);

    pthread_mutex_lock(&mtx3);

    pthread_mutex_unlock(&mtx3);

    pthread_mutex_unlock(&mtx2);

    return NULL;

}

void *thread_routine_c(void *arg) {

    printf("thread_routine c\n");
    pthread_mutex_lock(&mtx3);

    sleep(1);

    pthread_mutex_lock(&mtx4);

    pthread_mutex_unlock(&mtx4);

    pthread_mutex_unlock(&mtx3);

    return NULL;

}

void *thread_routine_d(void *arg) {

    printf("thread_routine d\n");
    pthread_mutex_lock(&mtx4);

    sleep(1);

    pthread_mutex_lock(&mtx1);

    pthread_mutex_unlock(&mtx1);

    pthread_mutex_unlock(&mtx4);

    return NULL;

}

int main() {

    init_hook();
    start_check();

    printf("start_check\n");

    pthread_t tid1, tid2, tid3, tid4;

    pthread_create(&tid1, NULL, thread_routine_a, NULL);
    pthread_create(&tid2, NULL, thread_routine_b, NULL);
    pthread_create(&tid3, NULL, thread_routine_c, NULL);
    pthread_create(&tid4, NULL, thread_routine_d, NULL);

    pthread_join(tid1, NULL);
    pthread_join(tid2, NULL);
    pthread_join(tid3, NULL);
    pthread_join(tid4, NULL);

    return 0;

}

完整代码

#define _GNU_SOURCE
#include <dlfcn.h>

#include <stdio.h>
#include <pthread.h>
#include <unistd.h>
#include <stdint.h>
#include <stdlib.h>

#if 1 // graph

#define MAX 100

typedef struct _node {

    uint64_t threadid;
    uint64_t lockid;

    int degress;

} node;

typedef struct _vertex {

    node n;
    struct _vertex *next;


} vertex;

typedef struct _thread_graph {

    vertex list[MAX]; // selfid -> thid,记录本线程都指向哪些线程
    int num;

    
    node locklist[MAX]; // mutex is locked by which thread,锁当前是由哪个线程占有
    int lockidx;

} thread_graph;

thread_graph *tg = NULL;
char visited[MAX];
int path[MAX];
int k = 0;
int deadlock = 0;


vertex *create_vertex(node n) {

    vertex *vtx = (vertex *)calloc(1, sizeof(vertex));
    if (vtx == NULL) return NULL;

    vtx->n = n;
    vtx->next = NULL;

    return vtx;
}

int search_vertex(node n) {

    int i = 0;
    for (i = 0; i < tg->num; i++) {
        if (tg->list[i].n.threadid == n.threadid) {

            return i;
        }
    }

    return -1;

}

void add_vertex(node n) {

    if (search_vertex(n) == -1) {

        tg->list[tg->num].n = n;
        tg->list[tg->num].next = NULL;
        tg->num ++;
    }

}

void add_edge(node from, node to) {

    add_vertex(from);
    add_vertex(to);

    vertex *vtx = &tg->list[search_vertex(from)];

    while (vtx->next != NULL) {
        vtx = vtx->next;
    }

    vtx->next = create_vertex(to);

}

int verify_edge(node from, node to) {

    if (tg->num == 0) return 0;

    int idx = search_vertex(from);
    if (idx == -1) {
        return 0;
    }

    vertex *vtx = &(tg->list[idx]);

    while (vtx != NULL) {

        if (vtx->n.threadid == to.threadid) return 1;

        vtx = vtx->next;
        
    }

    return 0;

}

int remove_edge(node from, node to) {

    int idxi = search_vertex(from);
    int idxj = search_vertex(to);

    if (idxi != -1 && idxj != -1) {

        vertex *v = &tg->list[idxi];
        vertex *temp;

        while (v->next != NULL) {

            if (v->next->n.threadid == to.threadid) {

                temp = v->next;
                v->next = v->next->next;

                free(temp);
                break;

            }

            v = v->next;
        }

    }

}

void print_deadlock(void) {

    int i = 0;

    printf("deadlock : ");
    for (i = 0;i < k-1; i ++) {

        printf("%ld --> ", tg->list[path[i]].n.threadid);

    }

    printf("%ld\n", tg->list[path[i]].n.threadid);

}

int dfs(int idx) {

    vertex *vtx = &tg->list[idx];

    if (visited[idx] == 1) {
        // deadlock
        path[k++] = idx;
        deadlock = 1;

        print_deadlock();

        return 0;
    }

    visited[idx] = 1;
    path[k++] = idx;

    while (vtx->next) {

        dfs(search_vertex(vtx->next->n));
        k--;

        vtx = vtx->next;

    }

    return 1;

}

int search_for_cycle(int idx) {

    vertex *vtx = &tg->list[idx];
    visited[idx] = 1;
    k = 0;
    path[k++] = idx;

    while (vtx->next != NULL) {

        int i = 0;
        for (i = 0;i < tg->num;i ++) {
            if (i == idx) continue;
            
            visited[i] = 0;
        }

        for (i = 1;i <= MAX;i ++) {
            path[i] = -1;
        }
        k = 1;

        dfs(search_vertex(vtx->next->n));
        vtx = vtx->next;
    }

}

#endif



typedef int (*pthread_mutex_lock_t)(pthread_mutex_t *mutex);
pthread_mutex_lock_t pthread_mutex_lock_f;

typedef int (*pthread_mutex_unlock_t)(pthread_mutex_t *mutex);
pthread_mutex_unlock_t pthread_mutex_unlock_f;

pthread_mutex_t mtx1 = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t mtx2 = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t mtx3 = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t mtx4 = PTHREAD_MUTEX_INITIALIZER;


void lock_before(uint64_t threadid, uint64_t lockid) {

    int idx = 0;
    for (idx = 0; idx < tg->lockidx; idx++) {

        if (lockid == tg->locklist[idx].lockid) {

            node from;
            from.threadid = threadid;
            add_vertex(from);

            node to;
            to.threadid = tg->locklist[idx].threadid;
            add_vertex(to);

            tg->locklist[idx].degress++; // inc

            if (!verify_edge(from, to)) {
                add_edge(from, to);
            }

        }

    }

}

int search_lock(uint64_t lockid) {

    int idx = 0;

    for (idx = 0; idx < tg->lockidx; idx++) {

        if (lockid == tg->locklist[idx].lockid) {
            return idx;
        }

    }

    return -1;

}

int search_empty_lock(uint64_t lockid) {

    int idx = 0;

    for (idx = 0; idx < tg->lockidx; idx++) {

        if (0 == tg->locklist[idx].lockid) {
            return idx;
        }

    }

    return tg->lockidx;

}

void lock_after(uint64_t threadid, uint64_t lockid) {
    
    int idx = 0;
    if (-1 == (idx = search_lock(lockid))) {

        int emp_idx = search_empty_lock(lockid);
        tg->locklist[emp_idx].lockid = lockid;
        tg->locklist[emp_idx].threadid = threadid;

        tg->lockidx++; // inc

    } else {

        node from;
        from.threadid = threadid;
        add_vertex(from);

        node to;
        to.threadid = tg->locklist[idx].threadid;
        add_vertex(to);

        tg->locklist[idx].degress--; // inc

        if (!verify_edge(from, to)) {
            remove_edge(from, to);
            // 此处可以加上环的检测,可以实现死锁的预防
            // 如果有环,则remove_edge
        }

        tg->locklist[idx].threadid = threadid;

    }

}

void unlock_after(uint64_t threadid, uint64_t lockid) {
    
    int idx = search_lock(lockid);

    if (tg->locklist[idx].degress == 0) {

        tg->locklist[idx].lockid = 0;
        tg->locklist[idx].threadid = 0;

    }

}


int pthread_mutex_lock(pthread_mutex_t *mutex) {

    pthread_t selfid = pthread_self();

    lock_before(selfid, (uint64_t)mutex);

    printf("pthread_mutex_lock selfid: %lu, mutex: %p\n", selfid, mutex);

    pthread_mutex_lock_f(mutex);

    lock_after(selfid, (uint64_t)mutex);

}

int pthread_mutex_unlock(pthread_mutex_t *mutex) {

    pthread_t selfid = pthread_self();

    printf("pthread_mutex_unlock\n");

    pthread_mutex_unlock_f(mutex);

    unlock_after(selfid, (uint64_t)mutex);

}



static int init_hook() {

    pthread_mutex_lock_f = (pthread_mutex_lock_t)dlsym(RTLD_NEXT, "pthread_mutex_lock");

    pthread_mutex_unlock_f = (pthread_mutex_unlock_t)dlsym(RTLD_NEXT, "pthread_mutex_unlock");

}

void check_dead_lock(void) {

    int i = 0;

    deadlock = 0;
    for (i = 0; i < tg->num; i ++) {
        if (deadlock == 1) break;
        search_for_cycle(i);
    }

    if (deadlock == 0) {
        printf("no deadlock\n");
    }

}

static void *thread_routine(void *args) {

    while (1) {

        sleep(5);
        check_dead_lock();

    }

    return NULL;

}

void start_check(void) {

    tg = (thread_graph*)calloc(1, sizeof(thread_graph));
    tg->num = 0;
    tg->lockidx = 0;
    
    pthread_t tid;

    pthread_create(&tid, NULL, thread_routine, NULL);

}

void *thread_routine_a(void *arg) {

    printf("thread_routine a\n");
    pthread_mutex_lock(&mtx1);

    sleep(1);

    pthread_mutex_lock(&mtx2);

    pthread_mutex_unlock(&mtx2);

    pthread_mutex_unlock(&mtx1);

    return NULL;
}

void *thread_routine_b(void *arg) {

    printf("thread_routine b\n");
    pthread_mutex_lock(&mtx2);

    sleep(1);

    pthread_mutex_lock(&mtx3);

    pthread_mutex_unlock(&mtx3);

    pthread_mutex_unlock(&mtx2);

    return NULL;

}

void *thread_routine_c(void *arg) {

    printf("thread_routine c\n");
    pthread_mutex_lock(&mtx3);

    sleep(1);

    pthread_mutex_lock(&mtx4);

    pthread_mutex_unlock(&mtx4);

    pthread_mutex_unlock(&mtx3);

    return NULL;

}

void *thread_routine_d(void *arg) {

    printf("thread_routine d\n");
    pthread_mutex_lock(&mtx4);

    sleep(1);

    pthread_mutex_lock(&mtx1);

    pthread_mutex_unlock(&mtx1);

    pthread_mutex_unlock(&mtx4);

    return NULL;

}

// gcc -o deadlock_0 deadlock_0.c -lpthread -ldl

int main() {

    init_hook();
    start_check();

    printf("start_check\n");

    pthread_t tid1, tid2, tid3, tid4;

    pthread_create(&tid1, NULL, thread_routine_a, NULL);
    pthread_create(&tid2, NULL, thread_routine_b, NULL);
    pthread_create(&tid3, NULL, thread_routine_c, NULL);
    pthread_create(&tid4, NULL, thread_routine_d, NULL);

    pthread_join(tid1, NULL);
    pthread_join(tid2, NULL);
    pthread_join(tid3, NULL);
    pthread_join(tid4, NULL);

    return 0;

}

测试结果

image.png

 总结

死锁检测的实现,主要就是检测资源获取是否成环,主要考虑图的构建、环的检测、对lock/unlock进行hook以及加锁解锁过程中操作图的时机。​​​​​​​

  • 2
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值