使用有向邻接表检测死锁

最新推荐文章于 2024-02-21 13:45:48 发布

他过江

最新推荐文章于 2024-02-21 13:45:48 发布

阅读量714

点赞数

分类专栏：基础组件 Linux C++后台文章标签：数据结构死锁检测有向邻接表

本文链接：https://blog.csdn.net/et_endeavoring/article/details/122158838

版权

Linux C++后台同时被 2 个专栏收录

13 篇文章 2 订阅

订阅专栏

基础组件

1 篇文章 0 订阅

订阅专栏

死锁是什么

两个或多个进程/线程在等待对方占用的资源，结果都无法往下继续执行。
在这里插入图片描述

死锁的模拟

启动四个线程t1，t2，t3，t4，各自锁上资源A，B，C，D，然后t1锁B，t2锁C，t3锁D，t4锁A。
在这里插入图片描述

死锁的检测

由上图可以看出，线程对资源的请求形成了一个环路，所以判断是否存在死锁可以转化为判断是否有资源环路。这里将互斥量作为竞争的资源。
这里数据结构使用有向邻接表。

在将死锁检测转换为有向邻接表判断有环前，先介绍一下邻接表。

邻接表

邻接矩阵当边相对于顶点较少的时（稀疏图）候会浪费很多存储空间，这里使用邻接表。
在这里插入图片描述
然后用DFS判断有向邻接表是否有环。

将死锁检测转换为有向邻接表判断有环

邻接表顶点数组里存储的是线程资源：包括线程ID。
线程t1想要对已经被t2占用的互斥量mtx2加锁就添加一条t1->t2的弧到t1的弧链表。线程t1获取到mtx2时（此时线程2早已解锁）则删除这条弧。
然后新开一个线程定时检测邻接表是否有环，有则将环打印出来。
根据上面的流程提出三个原语操作：加锁前，加锁后，解锁后。

加锁前
上锁前如果有其它线程在占用锁，则添加1条弧
加锁后
加锁后如果锁数组没有mtx，则添加
如果有mtx，说明之前一直阻塞等待锁，其它线程刚刚释放锁，本线程获取锁，删除弧，修改锁线程ID
解锁后
解锁后如果mtx没有线程在等待，则从锁数组移除

hook

用hook来在加锁，解锁调用前后添加自定义代码。

typedef int (*pthread_mutex_lock_symbol)(pthread_mutex_t *mtx);
pthread_mutex_lock_symbol pthread_mutex_lock_s; 
pthread_mutex_lock_symbol pthread_mutex_unlock_s; 

void init_hook() {
    pthread_mutex_lock_s = dlsym(RTLD_NEXT, "pthread_mutex_lock");
    if (!pthread_mutex_lock_s) {
        printf("dlsym failed: %s\n", dlerror());
    }
    pthread_mutex_unlock_s = dlsym(RTLD_NEXT, "pthread_mutex_unlock");
    if (!pthread_mutex_unlock_s) {
        printf("dlsym 2 failed: %s\n", dlerror());
    }
}

int pthread_mutex_lock(pthread_mutex_t *mtx) {
    printf("lock %p, thread %lu\n", mtx, pthread_self());
    do_before_lock(mtx);
    pthread_mutex_lock_s(mtx);
    do_after_lock(mtx);
}

int pthread_mutex_unlock(pthread_mutex_t *mtx) {
    printf("unlock %p, thread %lu\n", mtx, pthread_self());
    pthread_mutex_unlock_s(mtx);
    do_after_unlock(mtx);
}

三个锁原语操作

/**
 * 上锁前如果有其它线程在占用锁，则添加1条弧
 */
void do_before_lock(pthread_mutex_t *mtx) {
    int lindex = get_lock_data_index(mtx);
    // 该锁被占用
    if (-1 != lindex) {
        data_t head;
        head.tid = pthread_self();
        add_vertex(head);

        data_t tail;
        tail.tid = al.ldarr[lindex].tid;
        add_vertex(tail);

        if (!is_arc_exist(head, tail)) {
            add_arc(head, tail);

            // TODO 可以在这里检测是否有环，有则打印存在死锁
        }

        ++al.ldarr[lindex].use_count;
    }
}


/** 
 * 加锁后如果锁数组没有mtx，则添加
 * 如果有mtx，说明之前一直阻塞等待锁，其它线程刚刚释放锁，本线程获取锁，删除弧，修改锁线程ID
 */
void do_after_lock(pthread_mutex_t *mtx) {
    int lindex = get_lock_data_index(mtx);
    if (-1 == lindex) {
        data_t tail;
        tail.tid = pthread_self();
        tail.lock_addr = mtx;
        tail.use_count = 0;
        al.ldarr[get_unused_lock_data_index()] = tail;
    }
    else {
        pthread_t tid = pthread_self();
        int vindex = find_vertex(tid);
        if (-1 == vindex)
        {
            return;
        }
        data_t head = al.varr[vindex].d;
        data_t tail = al.ldarr[lindex];
        if (is_arc_exist(head, tail)) {
            remove_arc(head, tail);
        }
        --al.ldarr[lindex].use_count;
        // 忘了
        al.ldarr[lindex].tid = tid;
    }
}

/** 
 * 解锁后如果mtx没有线程在等待，则从锁数组移除
 */
void do_after_unlock(pthread_mutex_t *mtx) {
    int lindex = get_lock_data_index(mtx);
    if (-1 == lindex) {
        return;
    }
    if (0 == al.ldarr[lindex].use_count) {
        al.ldarr[lindex].lock_addr = NULL;
        al.ldarr[lindex].tid = 0;
    }
}

启动线程定时检测环

void* checkDeadlockRoutine(void *arg) {
    while (1) {
        sleep(3);
        int i;
        for (i = 0; i < al.vnum; ++i) {
            if (1 == has_deadlock) {
                break;
            }
            checkGraphCycle(i);
        }
    }
    return NULL;
}

void checkGraphCycle(int idx) {
    int i;

    k = 0;
    path[k++] = idx;
    
    visited[idx] = 1;

    vertex_t *p = al.varr[i].next;
    // 遍历邻接点
    while (p) {
        for (i = 0; i < al.vnum; ++i) {
            if (i != idx) {
                visited[i] = 0;
            }
        }

        DFS(p);
        --k;

        p = p->next;
    }
}

void DFS(vertex_t *p) {
    int idx = find_vertex(p->d.tid);
    if (-1 == idx) {
        return;
    }

    path[k++] = idx;

    if (1 == visited[idx]) {
        has_deadlock = 1;
        printf("存在死锁\n");
        printCycle();
        return;
    }
    visited[idx] = 1;

    // 这里一个线程最多只能阻塞在一个pthread_mutex_lock上，所以一个线程最多一条弧，不会出现一个线程有多条弧的情况
    vertex_t *q = al.varr[idx].next;
    while (q) {
        DFS(q);
        --k;
        q = q->next;
    }
}

完整代码

/**
 * 死锁检测 -> 有向图成环
 * 死锁的模拟
 * HOOK, 三个原语操作：锁前，锁后，解锁后
 * 有向邻接表的基本操作：添加/删除/查找顶点，添加/删除弧，判断弧是否存在
 * 启动线程定时检测是否有环，DFS
 */

// long long是8个字节
// 32位 linux，指针和long是4字节
// 64位 linux，指针和long是8字节

#define _GNU_SOURCE // 启用RTLD_NEXT
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <pthread.h>
#include <dlfcn.h>

#define MAX_VERTEX      100

typedef struct vertex_s vertex_t;
typedef struct data_s data_t;
typedef struct adjlist_s adjlist_t;
typedef int (*pthread_mutex_lock_symbol)(pthread_mutex_t *mtx);

// 资源
struct data_s {
    pthread_t tid;          // 线程ID
    void *lock_addr;        // 互斥量地址，弧链表使用
    int use_count;          // 锁引用计数，弧链表使用
};

/** 
 * 这里顶点节点和弧节点用同一个结构体
 * 顶点数组里的vertex_t是线程
 * 弧链表里的vertex_t是资源（锁）
 */
struct vertex_s {
    data_t      d;
    vertex_t    *next;
};

// 有向邻接表
struct adjlist_s {
    vertex_t    varr[MAX_VERTEX];       // 顶点数组（线程）
    int         vnum;                   // 顶点数量
    data_t      ldarr[MAX_VERTEX];      // 加锁的资源
    int         ldindex;                // data数组最大下标

    // TODO 需要对两个数组加锁
#if 0
    pthread_mutex_t mtx;
#endif
};


adjlist_t al = {0};
// 节点是否被访问过
int visited[MAX_VERTEX] = {0};
int has_deadlock = 0;
// 死锁路径的下标数组
int path[MAX_VERTEX] = {0};
// 死锁路径最大下标+1
int k = 0;

pthread_mutex_lock_symbol pthread_mutex_lock_s; 
pthread_mutex_lock_symbol pthread_mutex_unlock_s; 

// 在顶点数组查找顶点
int find_vertex(pthread_t tid) {
    int i;
    for (i = 0; i < al.vnum; ++i) {
        if (al.varr[i].d.tid == tid) {
            return i;
        }
    }
    return -1;
}

// 在顶点数组添加顶点
void add_vertex(data_t d) {
    int index = find_vertex(d.tid);
    if (-1 == index) {
        al.varr[al.vnum].d = d;
        al.varr[al.vnum].next = NULL;
        ++al.vnum;
    }
}

// 创建链表中的顶点
vertex_t* create_vertex(data_t d) {
    vertex_t* v = (vertex_t*)calloc(1, sizeof(vertex_t));
    if (!v) {
        printf("calloc failed\n");
        return NULL;
    }
    v->d = d;
    v->next = NULL;
    return v;
}

// 添加弧
void add_arc(data_t head, data_t tail) {
    add_vertex(head);
    add_vertex(tail);
    
    int index = find_vertex(head.tid);
    if (-1 == index) {
        return;
    }
    vertex_t* p = &al.varr[index];
    while (p->next) {
        p = p->next;
    }
    p->next = create_vertex(tail);
}

// 删除弧
void remove_arc(data_t head, data_t tail) {
    int hindex = find_vertex(head.tid);
    int tindex = find_vertex(tail.tid);
    if (-1 == hindex || -1 == tindex) {
        printf("-1 == hindex || -1 == tindex\n");
        return;
    }
    vertex_t* p = &al.varr[hindex];
    while (p->next) {
        if (p->next->d.tid == tail.tid) {
            vertex_t* tmp = p->next;
            p->next = tmp->next;
            free(tmp);
            return;
        }
        p = p->next;
    }
}

// 弧是否存在
int is_arc_exist(data_t head, data_t tail) {
    int index = find_vertex(head.tid);
    if (-1 == index) {
        return 0;
    }
    vertex_t* p = &al.varr[index];
    while (p->next) {
        if (p->next->d.tid == tail.tid) {
            return 1;
        }
        p = p->next;
    }

    return 0;
}

void init_hook() {
    pthread_mutex_lock_s = dlsym(RTLD_NEXT, "pthread_mutex_lock");
    if (!pthread_mutex_lock_s) {
        printf("dlsym failed: %s\n", dlerror());
    }
    pthread_mutex_unlock_s = dlsym(RTLD_NEXT, "pthread_mutex_unlock");
    if (!pthread_mutex_unlock_s) {
        printf("dlsym 2 failed: %s\n", dlerror());
    }
}

int inc(int *value, int add) {

	int old;

	__asm__ volatile(
		"lock;xaddl %2, %1;"
		: "=a"(old)
		: "m"(*value), "a" (add)
		: "cc", "memory"
	);
	
	return old;
}

int get_lock_data_index(pthread_mutex_t *mtx) {
    int i;
    for (i = 0; i < al.ldindex; ++i) {
        if (al.ldarr[i].lock_addr == mtx) {
            return i;
        }
    }
    return -1;
}

// 这里容易出错
// 当查找不到锁地址，索引加1
int get_unused_lock_data_index() {
    int i;
    for (i = 0; i < MAX_VERTEX; ++i) {
        if (NULL == al.ldarr[i].lock_addr) {
#if 1
            // BUG
            // if (i > al.ldindex) {
            //     al.ldindex = i;
            // }

#if 1
            inc(&al.ldindex, 1);
#else
            ++al.ldindex;
#endif

#endif
            return i;
        }
    }
    return MAX_VERTEX;
}

/**
 * 上锁前如果有其它线程在占用锁，则添加1条弧
 */
void do_before_lock(pthread_mutex_t *mtx) {
    int lindex = get_lock_data_index(mtx);
    // 该锁被占用
    if (-1 != lindex) {
        data_t head;
        head.tid = pthread_self();
        add_vertex(head);

        data_t tail;
        tail.tid = al.ldarr[lindex].tid;
        add_vertex(tail);

        if (!is_arc_exist(head, tail)) {
            add_arc(head, tail);

            // TODO 可以在这里检测是否有环，有则打印存在死锁
        }

        ++al.ldarr[lindex].use_count;
    }
}

/** 
 * 加锁后如果锁数组没有mtx，则添加
 * 如果有mtx，说明之前一直阻塞等待锁，其它线程刚刚释放锁，本线程获取锁，删除弧，修改锁线程ID
 */
void do_after_lock(pthread_mutex_t *mtx) {
    int lindex = get_lock_data_index(mtx);
    if (-1 == lindex) {
        data_t tail;
        tail.tid = pthread_self();
        tail.lock_addr = mtx;
        tail.use_count = 0;
        al.ldarr[get_unused_lock_data_index()] = tail;
    }
    else {
        pthread_t tid = pthread_self();
        int vindex = find_vertex(tid);
        if (-1 == vindex)
        {
            return;
        }
        data_t head = al.varr[vindex].d;
        data_t tail = al.ldarr[lindex];
        if (is_arc_exist(head, tail)) {
            remove_arc(head, tail);
        }
        --al.ldarr[lindex].use_count;
        // 忘了
        al.ldarr[lindex].tid = tid;
    }
}

/** 
 * 解锁后如果mtx没有线程在等待，则从锁数组移除
 */
void do_after_unlock(pthread_mutex_t *mtx) {
    int lindex = get_lock_data_index(mtx);
    if (-1 == lindex) {
        return;
    }
    if (0 == al.ldarr[lindex].use_count) {
        al.ldarr[lindex].lock_addr = NULL;
        al.ldarr[lindex].tid = 0;
    }
}

int pthread_mutex_lock(pthread_mutex_t *mtx) {
    printf("lock %p, thread %lu\n", mtx, pthread_self());
    do_before_lock(mtx);
    pthread_mutex_lock_s(mtx);
    do_after_lock(mtx);
}

int pthread_mutex_unlock(pthread_mutex_t *mtx) {
    printf("unlock %p, thread %lu\n", mtx, pthread_self());
    pthread_mutex_unlock_s(mtx);
    do_after_unlock(mtx);
}

void printCycle() {
    int i;
#if 0
    printf("path[]: ");
    for (i = 0; i < k; ++i) {
        printf("%d ", path[i]);
    }
#else
    int idx;
    if (k < 1) {
        return;
    }
    for (i = 0; i < k - 1; ++i) {
        idx = path[i];
        printf("thread %lu ->", al.varr[idx].d.tid);
    }
    idx = path[i];
    printf("thread %lu\n", al.varr[idx].d.tid);
#endif
}

void DFS(vertex_t *p) {
    int idx = find_vertex(p->d.tid);
    if (-1 == idx) {
        return;
    }

    path[k++] = idx;

    if (1 == visited[idx]) {
        has_deadlock = 1;
        printf("存在死锁\n");
        printCycle();
        return;
    }
    visited[idx] = 1;

    // 这里一个线程最多只能阻塞在一个pthread_mutex_lock上，所以一个线程最多一条弧，不会出现一个线程有多条弧的情况
    vertex_t *q = al.varr[idx].next;
    while (q) {
        DFS(q);
        --k;
        q = q->next;
    }
}

void checkGraphCycle(int idx) {
    int i;

    k = 0;
    path[k++] = idx;
    
    visited[idx] = 1;

    vertex_t *p = al.varr[i].next;
    // 遍历邻接点
    while (p) {
        for (i = 0; i < al.vnum; ++i) {
            if (i != idx) {
                visited[i] = 0;
            }
        }

        DFS(p);
        --k;

        p = p->next;
    }
}

void* checkDeadlockRoutine(void *arg) {
    while (1) {
        sleep(3);
        int i;
        for (i = 0; i < al.vnum; ++i) {
            if (1 == has_deadlock) {
                break;
            }
            checkGraphCycle(i);
        }
    }
    return NULL;
}

void checkDeadlock() {
    pthread_t tid;
    pthread_create(&tid, NULL, checkDeadlockRoutine, NULL);
    // 需要detach吗？
    // pthread_detach(tid);
}

static pthread_mutex_t mtx1 = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t mtx2 = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t mtx3 = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t mtx4 = PTHREAD_MUTEX_INITIALIZER;

void* worker1(void *arg) {
    printf("thread1 %lu\n", pthread_self());
    pthread_mutex_lock(&mtx1);
    // 保证4个线程先各自获取到一把锁
    sleep(1);
    pthread_mutex_lock(&mtx2);

    pthread_mutex_unlock(&mtx2);
    pthread_mutex_unlock(&mtx1);
}

void* worker2(void *arg) {
    printf("thread2 %lu\n", pthread_self());
    pthread_mutex_lock(&mtx2);
    sleep(1);
    pthread_mutex_lock(&mtx3);

    pthread_mutex_unlock(&mtx3);
    pthread_mutex_unlock(&mtx2);
}

void* worker3(void *arg) {
    printf("thread3 %lu\n", pthread_self());
    pthread_mutex_lock(&mtx3);
    sleep(1);
    pthread_mutex_lock(&mtx4);

    pthread_mutex_unlock(&mtx4);
    pthread_mutex_unlock(&mtx3);
}

void* worker4(void *arg) {
    printf("thread4 %lu\n", pthread_self());
    pthread_mutex_lock(&mtx4);
    sleep(1);
    pthread_mutex_lock(&mtx1);

    pthread_mutex_unlock(&mtx1);
    pthread_mutex_unlock(&mtx4);
}

int main() {
    init_hook();
    checkDeadlock();

    pthread_t t1, t2, t3, t4;
    pthread_create(&t1, NULL, worker1, NULL);
    pthread_create(&t2, NULL, worker2, NULL);
    pthread_create(&t3, NULL, worker3, NULL);
    pthread_create(&t4, NULL, worker4, NULL);
    pthread_join(t1, NULL);
    pthread_join(t2, NULL);
    pthread_join(t3, NULL);
    pthread_join(t4, NULL);
    
    return 0;
}