手写死锁检测组件

congchp

已于 2022-01-19 08:59:35 修改

阅读量376

点赞数 2

分类专栏： Linux server 文章标签：后端 c语言服务器

于 2021-12-29 11:14:12 首次发布

本文链接：https://blog.csdn.net/congchp/article/details/122211328

版权

Linux server 专栏收录该内容

45 篇文章 7 订阅

订阅专栏

死锁发生的原因

当一个进程/线程在等待永远不可能获取到的资源时，就会产生死锁。

死锁产生的四个必要条件：

资源互斥
每个进程占有资源并等待其他资源

系统不能剥夺进程资源
进程资源图是一个环路

如下图所示，线程A占有资源1，线程B占有资源2，线程C占有资源3。如果线程A要申请资源2，线程B要申请资源3，线程C要申请资源1，进程之间就形成了一个资源依赖的环路，就会发生死锁。

#include <pthread.h>
#include <unistd.h>

pthread_mutex_t mtx1 = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t mtx2 = PTHREAD_MUTEX_INITIALIZER;

void *thread_routine_a(void *arg) {

    pthread_mutex_lock(&mtx1);
    sleep(1);

    pthread_mutex_lock(&mtx2);

    pthread_mutex_unlock(&mtx1);
    pthread_mutex_unlock(&mtx2);

}

void *thread_routine_b(void *arg) {

    pthread_mutex_lock(&mtx2);
    sleep(1);

    pthread_mutex_lock(&mtx1);

    pthread_mutex_unlock(&mtx2);
    pthread_mutex_unlock(&mtx1);
    
}


int main() {

    pthread_t tid1, tid2;

    pthread_create(&tid1, NULL, thread_routine_a, NULL);
    pthread_create(&tid2, NULL, thread_routine_b, NULL);

    pthread_join(tid1, NULL);
    pthread_join(tid2, NULL);

}

死锁的解决方法

死锁产生后，解决措施是打破四大条件，有下列方法

死锁预防：采用某种策略校址并发进程对于资源的请求，破坏死锁产生的四个条件之一，是系统任何时刻都不满足死锁的条件。
死锁避免：一般采用银行家算法来避免，银行家算法，就是提前计算出一条不会死锁的资源分配方法，才分配资源，否则不分配资源，相当于借贷，考虑对方还得起才借钱。

死锁检测：允许死锁产生，但系统定时运行一个检测死锁的程序，若检测到系统总发生死锁，则设法加以解除。
死锁解除：即死锁发生后的解除方法，如强制剥夺资源，撤销进程等。

本文只讨论死锁检测的实现。

死锁检测的实现原理

通过检测是否存在资源依赖的环路，来检测是否存在死锁。资源依赖环采用有向图来存储。线程A获取线程B已占用的锁，则为线程A指向线程B。启动一个线程来检测图是否有环的存在。

数据结构定义

typedef struct _node {

    uint64_t threadid;
    uint64_t lockid;

    int degress;

} node;

typedef struct _vertex {

    node n;
    struct _vertex *next;


} vertex;

typedef struct _thread_graph {

    vertex list[MAX]; // selfid -> thid，记录本线程都指向哪些线程
    int num;

    
    node locklist[MAX]; // mutex is locked by which thread，锁当前是由哪个线程占有
    int lockidx;

} thread_graph;

图的操作方法及检测图中是否有环

thread_graph *tg = NULL;
char visited[MAX];
int path[MAX];
int k = 0;
int deadlock = 0;


vertex *create_vertex(node n) {

    vertex *vtx = (vertex *)calloc(1, sizeof(vertex));
    if (vtx == NULL) return NULL;

    vtx->n = n;
    vtx->next = NULL;

    return vtx;
}

int search_vertex(node n) {

    int i = 0;
    for (i = 0; i < tg->num; i++) {
        if (tg->list[i].n.threadid == n.threadid) {

            return i;
        }
    }

    return -1;

}

void add_vertex(node n) {

    if (search_vertex(n) == -1) {

        tg->list[tg->num].n = n;
        tg->list[tg->num].next = NULL;
        tg->num ++;
    }

}

void add_edge(node from, node to) {

    add_vertex(from);
    add_vertex(to);

    vertex *vtx = &tg->list[search_vertex(from)];

    while (vtx->next != NULL) {
        vtx = vtx->next;
    }

    vtx->next = create_vertex(to);

}

int verify_edge(node from, node to) {

    if (tg->num == 0) return 0;

    int idx = search_vertex(from);
    if (idx == -1) {
        return 0;
    }

    vertex *vtx = &(tg->list[idx]);

    while (vtx != NULL) {

        if (vtx->n.threadid == to.threadid) return 1;

        vtx = vtx->next;
        
    }

    return 0;

}

int remove_edge(node from, node to) {

    int idxi = search_vertex(from);
    int idxj = search_vertex(to);

    if (idxi != -1 && idxj != -1) {

        vertex *v = &tg->list[idxi];
        vertex *temp;

        while (v->next != NULL) {

            if (v->next->n.threadid == to.threadid) {

                temp = v->next;
                v->next = v->next->next;

                free(temp);
                break;

            }

            v = v->next;
        }

    }

}

void print_deadlock(void) {

    int i = 0;

    printf("deadlock : ");
    for (i = 0;i < k-1; i ++) {

        printf("%ld --> ", tg->list[path[i]].n.threadid);

    }

    printf("%ld\n", tg->list[path[i]].n.threadid);

}

int dfs(int idx) {

    vertex *vtx = &tg->list[idx];

    if (visited[idx] == 1) {
        // deadlock
        path[k++] = idx;
        deadlock = 1;

        print_deadlock();

        return 0;
    }

    visited[idx] = 1;
    path[k++] = idx;

    while (vtx->next) {

        dfs(search_vertex(vtx->next->n));
        k--;

        vtx = vtx->next;

    }

    return 1;

}

int search_for_cycle(int idx) {

    vertex *vtx = &tg->list[idx];
    visited[idx] = 1;
    k = 0;
    path[k++] = idx;

    while (vtx->next != NULL) {

        int i = 0;
        for (i = 0;i < tg->num;i ++) {
            if (i == idx) continue;
            
            visited[i] = 0;
        }

        for (i = 1;i <= MAX;i ++) {
            path[i] = -1;
        }
        k = 1;

        dfs(search_vertex(vtx->next->n));
        vtx = vtx->next;
    }

}

测试代码

#if 1 // graph cycle test
int main() {


	tg = (thread_graph*)malloc(sizeof(thread_graph));
	tg->num = 0;

	node v1;
	v1.threadid = 1;
	add_vertex(v1);

	node v2;
	v2.threadid = 2;
	add_vertex(v2);

	node v3;
	v3.threadid = 3;
	add_vertex(v3);

	node v4;
	v4.threadid = 4;
	add_vertex(v4);

	
	node v5;
	v5.threadid = 5;
	add_vertex(v5);


	add_edge(v1, v2);
	add_edge(v2, v3);
	add_edge(v3, v4);
	add_edge(v4, v5);
	add_edge(v3, v1);
	
	search_for_cycle(search_vertex(v1));

}
#endif

测试结果

对lock/unlock进行hook

怎样去构建图中的数据呢？就需要在lock、unlock的时候去维护图的结构，比较好的方法就是对pthread_mutex_lock和pthread_mutex_unlock这两个函数进行hook, 让程序中调用这两个函数的地方，都使用我们自己定义的函数。

typedef int (*pthread_mutex_lock_t)(pthread_mutex_t *mutex);
pthread_mutex_lock_t pthread_mutex_lock_f;

typedef int (*pthread_mutex_unlock_t)(pthread_mutex_t *mutex);
pthread_mutex_unlock_t pthread_mutex_unlock_f;


int pthread_mutex_lock(pthread_mutex_t *mutex) {

    pthread_t selfid = pthread_self();

    lock_before(selfid, (uint64_t)mutex);

    printf("pthread_mutex_lock selfid: %lu, mutex: %p\n", selfid, mutex);

    pthread_mutex_lock_f(mutex);

    lock_after(selfid, (uint64_t)mutex);

}

int pthread_mutex_unlock(pthread_mutex_t *mutex) {

    pthread_t selfid = pthread_self();

    printf("pthread_mutex_unlock\n");

    pthread_mutex_unlock_f(mutex);

    unlock_after(selfid, (uint64_t)mutex);

}

static int init_hook() {

    pthread_mutex_lock_f = (pthread_mutex_lock_t)dlsym(RTLD_NEXT, "pthread_mutex_lock");

    pthread_mutex_unlock_f = (pthread_mutex_unlock_t)dlsym(RTLD_NEXT, "pthread_mutex_unlock");

}

lock_before、lock_after和unlock_after

lock_before、lock_after和unlock_after在系统条用的前后对图进行操作, 它们是整个内存泄漏检测实现的核心。

lock_before

lock_before, 就是进行获取锁前的操作，check 锁当前是否被占用，如果被占用，则需要添加一条边，由当前线程指向当前占有该锁的线程。

void lock_before(uint64_t threadid, uint64_t lockid) {

    int idx = 0;
    for (idx = 0; idx < tg->lockidx; idx++) {

        if (lockid == tg->locklist[idx].lockid) {

            node from;
            from.threadid = threadid;
            add_vertex(from);

            node to;
            to.threadid = tg->locklist[idx].threadid;
            add_vertex(to);

            tg->locklist[idx].degress++; // inc

            if (!verify_edge(from, to)) {
                add_edge(from, to);
                // 此处可以加上环的检测，可以实现死锁的预防
                // 如果有环，则remove_edge
            }

        }

    }

}

int search_lock(uint64_t lockid) {

    int idx = 0;

    for (idx = 0; idx < tg->lockidx; idx++) {

        if (lockid == tg->locklist[idx].lockid) {
            return idx;
        }

    }

    return -1;

}

lock_after

lock_after, 就是获取锁成功后的操作。

需要check锁之前是否被其他线程占有。如果之前没有被其他线程占有，则找到合适位置来放锁的信息，并且需要将锁的占有者设置为当前线程；如果锁之前是被其他线程占有，则需要删除edge。


int search_empty_lock(uint64_t lockid) {

    int idx = 0;

    for (idx = 0; idx < tg->lockidx; idx++) {

        if (0 == tg->locklist[idx].lockid) {
            return idx;
        }

    }

    return tg->lockidx;

}

void lock_after(uint64_t threadid, uint64_t lockid) {
    
    int idx = 0;
    if (-1 == (idx = search_lock(lockid))) {

        int emp_idx = search_empty_lock(lockid);
        tg->locklist[emp_idx].lockid = lockid;
        tg->locklist[emp_idx].threadid = threadid;

        tg->lockidx++; // inc

    } else {

        node from;
        from.threadid = threadid;
        add_vertex(from);

        node to;
        to.threadid = tg->locklist[idx].threadid;
        add_vertex(to);

        tg->locklist[idx].degress--; // inc

        if (!verify_edge(from, to)) {
            remove_edge(from, to);
        }

        tg->locklist[idx].threadid = threadid;

    }

}

unlock_after

unlock_after, 就是释放锁后的操作。需要将锁的信息清空。

void unlock_after(uint64_t threadid, uint64_t lockid) {
    
    int idx = search_lock(lockid);

    if (tg->locklist[idx].degress == 0) {

        tg->locklist[idx].lockid = 0;
        tg->locklist[idx].threadid = 0;

    }

}

启动线程进行检测

void check_dead_lock(void) {

    int i = 0;

    deadlock = 0;
    for (i = 0; i < tg->num; i ++) {
        if (deadlock == 1) break;
        search_for_cycle(i);
    }

    if (deadlock == 0) {
        printf("no deadlock\n");
    }

}

static void *thread_routine(void *args) {

    while (1) {

        sleep(5);
        check_dead_lock();

    }

    return NULL;

}

void start_check(void) {

    tg = (thread_graph*)calloc(1, sizeof(thread_graph));
    tg->num = 0;
    tg->lockidx = 0;
    
    pthread_t tid;

    pthread_create(&tid, NULL, thread_routine, NULL);

}

测试


pthread_mutex_t mtx1 = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t mtx2 = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t mtx3 = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t mtx4 = PTHREAD_MUTEX_INITIALIZER;

void *thread_routine_a(void *arg) {

    printf("thread_routine a\n");
    pthread_mutex_lock(&mtx1);

    sleep(1);

    pthread_mutex_lock(&mtx2);

    pthread_mutex_unlock(&mtx2);

    pthread_mutex_unlock(&mtx1);

    return NULL;
}

void *thread_routine_b(void *arg) {

    printf("thread_routine b\n");
    pthread_mutex_lock(&mtx2);

    sleep(1);

    pthread_mutex_lock(&mtx3);

    pthread_mutex_unlock(&mtx3);

    pthread_mutex_unlock(&mtx2);

    return NULL;

}

void *thread_routine_c(void *arg) {

    printf("thread_routine c\n");
    pthread_mutex_lock(&mtx3);

    sleep(1);

    pthread_mutex_lock(&mtx4);

    pthread_mutex_unlock(&mtx4);

    pthread_mutex_unlock(&mtx3);

    return NULL;

}

void *thread_routine_d(void *arg) {

    printf("thread_routine d\n");
    pthread_mutex_lock(&mtx4);

    sleep(1);

    pthread_mutex_lock(&mtx1);

    pthread_mutex_unlock(&mtx1);

    pthread_mutex_unlock(&mtx4);

    return NULL;

}

int main() {

    init_hook();
    start_check();

    printf("start_check\n");

    pthread_t tid1, tid2, tid3, tid4;

    pthread_create(&tid1, NULL, thread_routine_a, NULL);
    pthread_create(&tid2, NULL, thread_routine_b, NULL);
    pthread_create(&tid3, NULL, thread_routine_c, NULL);
    pthread_create(&tid4, NULL, thread_routine_d, NULL);

    pthread_join(tid1, NULL);
    pthread_join(tid2, NULL);
    pthread_join(tid3, NULL);
    pthread_join(tid4, NULL);

    return 0;

}

完整代码

#define _GNU_SOURCE
#include <dlfcn.h>

#include <stdio.h>
#include <pthread.h>
#include <unistd.h>
#include <stdint.h>
#include <stdlib.h>

#if 1 // graph

#define MAX 100

typedef struct _node {

    uint64_t threadid;
    uint64_t lockid;

    int degress;

} node;

typedef struct _vertex {

    node n;
    struct _vertex *next;


} vertex;

typedef struct _thread_graph {

    vertex list[MAX]; // selfid -> thid，记录本线程都指向哪些线程
    int num;

    
    node locklist[MAX]; // mutex is locked by which thread，锁当前是由哪个线程占有
    int lockidx;

} thread_graph;

thread_graph *tg = NULL;
char visited[MAX];
int path[MAX];
int k = 0;
int deadlock = 0;


vertex *create_vertex(node n) {

    vertex *vtx = (vertex *)calloc(1, sizeof(vertex));
    if (vtx == NULL) return NULL;

    vtx->n = n;
    vtx->next = NULL;

    return vtx;
}

int search_vertex(node n) {

    int i = 0;
    for (i = 0; i < tg->num; i++) {
        if (tg->list[i].n.threadid == n.threadid) {

            return i;
        }
    }

    return -1;

}

void add_vertex(node n) {

    if (search_vertex(n) == -1) {

        tg->list[tg->num].n = n;
        tg->list[tg->num].next = NULL;
        tg->num ++;
    }

}

void add_edge(node from, node to) {

    add_vertex(from);
    add_vertex(to);

    vertex *vtx = &tg->list[search_vertex(from)];

    while (vtx->next != NULL) {
        vtx = vtx->next;
    }

    vtx->next = create_vertex(to);

}

int verify_edge(node from, node to) {

    if (tg->num == 0) return 0;

    int idx = search_vertex(from);
    if (idx == -1) {
        return 0;
    }

    vertex *vtx = &(tg->list[idx]);

    while (vtx != NULL) {

        if (vtx->n.threadid == to.threadid) return 1;

        vtx = vtx->next;
        
    }

    return 0;

}

int remove_edge(node from, node to) {

    int idxi = search_vertex(from);
    int idxj = search_vertex(to);

    if (idxi != -1 && idxj != -1) {

        vertex *v = &tg->list[idxi];
        vertex *temp;

        while (v->next != NULL) {

            if (v->next->n.threadid == to.threadid) {

                temp = v->next;
                v->next = v->next->next;

                free(temp);
                break;

            }

            v = v->next;
        }

    }

}

void print_deadlock(void) {

    int i = 0;

    printf("deadlock : ");
    for (i = 0;i < k-1; i ++) {

        printf("%ld --> ", tg->list[path[i]].n.threadid);

    }

    printf("%ld\n", tg->list[path[i]].n.threadid);

}

int dfs(int idx) {

    vertex *vtx = &tg->list[idx];

    if (visited[idx] == 1) {
        // deadlock
        path[k++] = idx;
        deadlock = 1;

        print_deadlock();

        return 0;
    }

    visited[idx] = 1;
    path[k++] = idx;

    while (vtx->next) {

        dfs(search_vertex(vtx->next->n));
        k--;

        vtx = vtx->next;

    }

    return 1;

}

int search_for_cycle(int idx) {

    vertex *vtx = &tg->list[idx];
    visited[idx] = 1;
    k = 0;
    path[k++] = idx;

    while (vtx->next != NULL) {

        int i = 0;
        for (i = 0;i < tg->num;i ++) {
            if (i == idx) continue;
            
            visited[i] = 0;
        }

        for (i = 1;i <= MAX;i ++) {
            path[i] = -1;
        }
        k = 1;

        dfs(search_vertex(vtx->next->n));
        vtx = vtx->next;
    }

}

#endif



typedef int (*pthread_mutex_lock_t)(pthread_mutex_t *mutex);
pthread_mutex_lock_t pthread_mutex_lock_f;

typedef int (*pthread_mutex_unlock_t)(pthread_mutex_t *mutex);
pthread_mutex_unlock_t pthread_mutex_unlock_f;

pthread_mutex_t mtx1 = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t mtx2 = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t mtx3 = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t mtx4 = PTHREAD_MUTEX_INITIALIZER;


void lock_before(uint64_t threadid, uint64_t lockid) {

    int idx = 0;
    for (idx = 0; idx < tg->lockidx; idx++) {

        if (lockid == tg->locklist[idx].lockid) {

            node from;
            from.threadid = threadid;
            add_vertex(from);

            node to;
            to.threadid = tg->locklist[idx].threadid;
            add_vertex(to);

            tg->locklist[idx].degress++; // inc

            if (!verify_edge(from, to)) {
                add_edge(from, to);
            }

        }

    }

}

int search_lock(uint64_t lockid) {

    int idx = 0;

    for (idx = 0; idx < tg->lockidx; idx++) {

        if (lockid == tg->locklist[idx].lockid) {
            return idx;
        }

    }

    return -1;

}

int search_empty_lock(uint64_t lockid) {

    int idx = 0;

    for (idx = 0; idx < tg->lockidx; idx++) {

        if (0 == tg->locklist[idx].lockid) {
            return idx;
        }

    }

    return tg->lockidx;

}

void lock_after(uint64_t threadid, uint64_t lockid) {
    
    int idx = 0;
    if (-1 == (idx = search_lock(lockid))) {

        int emp_idx = search_empty_lock(lockid);
        tg->locklist[emp_idx].lockid = lockid;
        tg->locklist[emp_idx].threadid = threadid;

        tg->lockidx++; // inc

    } else {

        node from;
        from.threadid = threadid;
        add_vertex(from);

        node to;
        to.threadid = tg->locklist[idx].threadid;
        add_vertex(to);

        tg->locklist[idx].degress--; // inc

        if (!verify_edge(from, to)) {
            remove_edge(from, to);
            // 此处可以加上环的检测，可以实现死锁的预防
            // 如果有环，则remove_edge
        }

        tg->locklist[idx].threadid = threadid;

    }

}

void unlock_after(uint64_t threadid, uint64_t lockid) {
    
    int idx = search_lock(lockid);

    if (tg->locklist[idx].degress == 0) {

        tg->locklist[idx].lockid = 0;
        tg->locklist[idx].threadid = 0;

    }

}


int pthread_mutex_lock(pthread_mutex_t *mutex) {

    pthread_t selfid = pthread_self();

    lock_before(selfid, (uint64_t)mutex);

    printf("pthread_mutex_lock selfid: %lu, mutex: %p\n", selfid, mutex);

    pthread_mutex_lock_f(mutex);

    lock_after(selfid, (uint64_t)mutex);

}

int pthread_mutex_unlock(pthread_mutex_t *mutex) {

    pthread_t selfid = pthread_self();

    printf("pthread_mutex_unlock\n");

    pthread_mutex_unlock_f(mutex);

    unlock_after(selfid, (uint64_t)mutex);

}



static int init_hook() {

    pthread_mutex_lock_f = (pthread_mutex_lock_t)dlsym(RTLD_NEXT, "pthread_mutex_lock");

    pthread_mutex_unlock_f = (pthread_mutex_unlock_t)dlsym(RTLD_NEXT, "pthread_mutex_unlock");

}

void check_dead_lock(void) {

    int i = 0;

    deadlock = 0;
    for (i = 0; i < tg->num; i ++) {
        if (deadlock == 1) break;
        search_for_cycle(i);
    }

    if (deadlock == 0) {
        printf("no deadlock\n");
    }

}

static void *thread_routine(void *args) {

    while (1) {

        sleep(5);
        check_dead_lock();

    }

    return NULL;

}

void start_check(void) {

    tg = (thread_graph*)calloc(1, sizeof(thread_graph));
    tg->num = 0;
    tg->lockidx = 0;
    
    pthread_t tid;

    pthread_create(&tid, NULL, thread_routine, NULL);

}

void *thread_routine_a(void *arg) {

    printf("thread_routine a\n");
    pthread_mutex_lock(&mtx1);

    sleep(1);

    pthread_mutex_lock(&mtx2);

    pthread_mutex_unlock(&mtx2);

    pthread_mutex_unlock(&mtx1);

    return NULL;
}

void *thread_routine_b(void *arg) {

    printf("thread_routine b\n");
    pthread_mutex_lock(&mtx2);

    sleep(1);

    pthread_mutex_lock(&mtx3);

    pthread_mutex_unlock(&mtx3);

    pthread_mutex_unlock(&mtx2);

    return NULL;

}

void *thread_routine_c(void *arg) {

    printf("thread_routine c\n");
    pthread_mutex_lock(&mtx3);

    sleep(1);

    pthread_mutex_lock(&mtx4);

    pthread_mutex_unlock(&mtx4);

    pthread_mutex_unlock(&mtx3);

    return NULL;

}

void *thread_routine_d(void *arg) {

    printf("thread_routine d\n");
    pthread_mutex_lock(&mtx4);

    sleep(1);

    pthread_mutex_lock(&mtx1);

    pthread_mutex_unlock(&mtx1);

    pthread_mutex_unlock(&mtx4);

    return NULL;

}

// gcc -o deadlock_0 deadlock_0.c -lpthread -ldl

int main() {

    init_hook();
    start_check();

    printf("start_check\n");

    pthread_t tid1, tid2, tid3, tid4;

    pthread_create(&tid1, NULL, thread_routine_a, NULL);
    pthread_create(&tid2, NULL, thread_routine_b, NULL);
    pthread_create(&tid3, NULL, thread_routine_c, NULL);
    pthread_create(&tid4, NULL, thread_routine_d, NULL);

    pthread_join(tid1, NULL);
    pthread_join(tid2, NULL);
    pthread_join(tid3, NULL);
    pthread_join(tid4, NULL);

    return 0;

}

测试结果

总结

死锁检测的实现，主要就是检测资源获取是否成环，主要考虑图的构建、环的检测、对lock/unlock进行hook以及加锁解锁过程中操作图的时机。

congchp

关注

2
点赞
踩
3

收藏

觉得还不错? 一键收藏
0
评论
手写死锁检测组件

死锁发生的原因当一个进程/线程在等待永远不可能获取到的资源时，就会产生死锁。死锁产生的四个必要条件：资源互斥每个进程占有资源并等待其他资源系统不能剥夺进程资源进程资源图是一个环路如下图所示，线程A占有资源1，线程B占有资源2，线程C占有资源3。如果线程A要申请资源2，线程B要申请资源3，线程C要申请资源1，进程之间就形成了一个资源依赖的环路，就会发生死锁。#include <pthread.h>#include <unistd.h>..
复制链接

扫一扫

专栏目录