手撕死锁检测

最新推荐文章于 2024-05-28 11:13:49 发布

张同学的成长记录

最新推荐文章于 2024-05-28 11:13:49 发布

阅读量971

点赞数 27

文章标签：开发语言 linux c++ c语言数据结构

本文链接：https://blog.csdn.net/weixin_45634782/article/details/139089241

版权

1. 引言

在现代软件开发中，多线程编程是一种常用的技术，它允许程序同时执行多个任务，提高效率和响应速度。然而，随着线程数量的增加，线程间的资源共享就可能导致复杂的同步问题，其中最严重的问题之一就是死锁。死锁发生时，多个线程相互等待对方释放资源，导致它们都无法继续执行，从而使程序挂起。

理解和检测死锁对于开发稳定和可靠的软件系统极为重要。只有通过有效的死锁检测和管理，开发者才能确保软件系统能够在高并发情况下正常运行，避免因死锁造成的服务中断或系统崩溃，保证用户体验和系统的持续可用性。

2. 死锁的基本概念及形成条件

定义

死锁是一种特定的程序状态，其中涉及的每个线程都在等待另一个已被其他线程持有的资源，由此形成了一个无尽的等待循环，没有任何线程能够继续执行。

形成条件

死锁的形成通常需要满足以下四个条件：

互斥条件：至少有一个资源必须处于非共享模式，即一次只能由一个线程使用。
持有并等待条件：至少有一个线程持有一个资源，并且正在等待获取一个由其他线程持有的额外资源。
非抢占条件：资源只能由持有它的线程释放，不能被操作系统或其他线程强制抢占。
循环等待条件：涉及的每个线程都在等待一个资源，该资源由另一个相互等待的线程持有，形成一个闭合的循环依赖链。

例子

考虑一个简单的例子，两个线程A和B。线程A持有资源1并请求资源2，而线程B持有资源2并请求资源1。如果没有适当的预防或检测机制，这种情况将导致死锁。

在你的博客中，可以更详细地引入上述代码，来增强读者对死锁检测组件实现的理解。下面是对这段代码及其功能的详细介绍，可作为博客内容的一部分：

3. 死锁检测组件的设计和实现

关系表 (`rela_table`)

关系表 rela_table 是一个数组，用于跟踪当前哪个线程持有哪个互斥锁。每个元素包含一个互斥锁的指针和一个线程ID。以下是操作关系表的关键函数：

search_rela_table(pthread_mutex_t *mtx)：遍历 rela_table 查找给定互斥锁，返回持有该锁的线程ID。如果没有找到，返回0。
add_rela_table(pthread_mutex_t *mtx, pthread_t tid)：在表中添加一个新的锁和线程关系。如果表满或已存在，则返回错误。
del_rela_table(pthread_mutex_t *mtx, pthread_t tid)：从表中删除特定的锁和线程关系，用于锁释放后清理状态。

资源分配图 (`task_graph`)

资源分配图通过顶点和边的动态列表维护线程和资源的关系。每个顶点代表一个进程或资源，边表示请求关系。以下是核心函数：

create_vertex(struct source_type type)：基于提供的资源或进程类型创建一个新的顶点。
add_vertex(struct source_type type) 和 add_edge(struct source_type from, struct source_type to)：在图中添加顶点和边，用于表示线程请求锁的操作。
search_vertex(struct source_type type) 和 remove_edge(struct source_type from, struct source_type to)：搜索特定顶点和移除边，用于锁释放时更新图状态。

死锁检测算法

使用深度优先搜索（DFS）在资源分配图中寻找环路。以下是关键的DFS实现和周期性检测函数：
- DFS(int idx)：从指定顶点开始深度优先搜索，用于探测环。
- search_for_cycle(int idx)：为每个顶点启动DFS，检查图中是否存在循环依赖，即死锁。

完整代码

#define MAX		100
typedef unsigned long int uint64;

struct rela_node_s {
	pthread_mutex_t *mtx;
	pthread_t thid;
};

struct rela_node_s rela_table[MAX] = {0};

pthread_t search_rela_table(pthread_mutex_t *mtx) {
	int i = 0;
	for (i = 0;i < MAX;i ++) {
		if (mtx == rela_table[i].mtx) {
			return rela_table[i].thid;
		}
	}
	return 0;
} 

int del_rela_table(pthread_mutex_t *mtx, pthread_t tid) {
	int i = 0;
	for (i = 0;i < MAX;i ++) {
		if ((mtx == rela_table[i].mtx) && (tid == rela_table[i].thid)) {
			rela_table[i].mtx = NULL;
			rela_table[i].thid = 0;
			return 0;
		}
	
	}

	return -1;
}

int add_rela_table(pthread_mutex_t *mtx, pthread_t tid) {
	int i = 0;
	for (i = 0;i < MAX;i ++) {

		if ((rela_table[i].mtx == NULL) && (rela_table[i].thid == 0)) {
			rela_table[i].mtx = mtx;
			rela_table[i].thid = tid;
			return 0;
		}
	}
	return -1;
}

enum Type {PROCESS, RESOURCE};

struct source_type {

	uint64 id;
	enum Type type;
	uint64 lock_id;
	int degress;
};

struct vertex {
	struct source_type s;
	struct vertex *next;
};

struct task_graph {
	struct vertex list[MAX];
	int num;
	struct source_type locklist[MAX];
	int lockidx;
	pthread_mutex_t mutex;
};

struct task_graph *tg = NULL;
int path[MAX+1];
int visited[MAX];
int k = 0;
int deadlock = 0;

struct vertex *create_vertex(struct source_type type) {
	struct vertex *tex = (struct vertex *)malloc(sizeof(struct vertex ));
	tex->s = type;
	tex->next = NULL;
	return tex;
}


int search_vertex(struct source_type type) {
	int i = 0;
	for (i = 0;i < tg->num;i ++) {

		if (tg->list[i].s.type == type.type && tg->list[i].s.id == type.id) {
			return i;
		}
	}
	return -1;
}

void add_vertex(struct source_type type) {
	if (search_vertex(type) == -1) {

		tg->list[tg->num].s = type;
		tg->list[tg->num].next = NULL;
		tg->num ++;

	}

}


int add_edge(struct source_type from, struct source_type to) {

	add_vertex(from);
	add_vertex(to);

	struct vertex *v = &(tg->list[search_vertex(from)]);

	while (v->next != NULL) {
		v = v->next;
	}
	v->next = create_vertex(to);

}


int verify_edge(struct source_type i, struct source_type j) {

	if (tg->num == 0) return 0;

	int idx = search_vertex(i);
	if (idx == -1) {
		return 0;
	}

	struct vertex *v = &(tg->list[idx]);

	while (v != NULL) {
		if (v->s.id == j.id) return 1;
		v = v->next;
	}
	return 0;

}


int remove_edge(struct source_type from, struct source_type to) {

	int idxi = search_vertex(from);
	int idxj = search_vertex(to);

	if (idxi != -1 && idxj != -1) {
		struct vertex *v = &tg->list[idxi];
		struct vertex *remove;
		while (v->next != NULL) {
			if (v->next->s.id == to.id) {
				remove = v->next;
				v->next = v->next->next;
				free(remove);
				break;
			}
			v = v->next;
		}

	}

}


void print_deadlock(void) {
	int i = 0;
	printf("cycle : ");
	for (i = 0;i < k-1;i ++) {
		printf("%ld --> ", tg->list[path[i]].s.id);
	}
	printf("%ld\n", tg->list[path[i]].s.id);

}

int DFS(int idx) {

	struct vertex *ver = &tg->list[idx];
	if (visited[idx] == 1) {
		path[k++] = idx;
		print_deadlock();
		deadlock = 1;
		return 0;
	}
	visited[idx] = 1;
	path[k++] = idx;
	while (ver->next != NULL) {
		DFS(search_vertex(ver->next->s));
		k --;
		ver = ver->next;
	}
	return 1;

}


int search_for_cycle(int idx) {
	struct vertex *ver = &tg->list[idx];
	visited[idx] = 1;
	k = 0;
	path[k++] = idx;

	while (ver->next != NULL) {
		int i = 0;
		for (i = 0;i < tg->num;i ++) {
			if (i == idx) continue;
			visited[i] = 0;
		}
		for (i = 1;i <= MAX;i ++) {
			path[i] = -1;
		}
		k = 1;

		DFS(search_vertex(ver->next->s));
		ver = ver->next;
	}

}

int init_graph(void) {
	tg = (struct task_graph*)malloc(sizeof(struct task_graph));
	tg->num = 0;
}





### 4. 实际应用和测试

#### 测试场景
创建了一个模拟环境，其中四个线程（t1, t2, t3, t4）尝试按顺序获取和释放四个互斥锁（mtx1, mtx2, mtx3, mtx4），但设计为故意构成死锁：

```c
void* t1_cb(void *arg) {
    pthread_mutex_lock(&mtx1);
    sleep(1);
    pthread_mutex_lock(&mtx2);
    // do work
    pthread_mutex_unlock(&mtx2);
    pthread_mutex_unlock(&mtx1);
}

void* t2_cb(void *arg) {
    pthread_mutex_lock(&mtx2);
    sleep(1);
    pthread_mutex_lock(&mtx3);
    // do work
    pthread_mutex_unlock(&mtx3);
    pthread_mutex_unlock(&mtx2);
}

// Similar implementations for t3_cb and t4_cb...

代码片段

以下是钩子函数的实现，它们在锁操作前后插入了死锁检测逻辑：

void before_lock(pthread_t tid, pthread_mutex_t *mtx) {

	pthread_t otherid = search_rela_table(mtx);

	if (otherid != 0) {

		struct source_type from;
		from.id = tid;
		from.type = PROCESS;

		struct source_type to;
		to.id = otherid;
		to.type = PROCESS;
		
		add_edge(tid, otherid);
	}
	

}

void after_lock(pthread_t tid, pthread_mutex_t *mtx) {

	pthread_t otherid = search_rela_table(mtx);

	if (otherid != 0) {

		struct source_type from;
		from.id = tid;
		from.type = PROCESS;

		struct source_type to;
		to.id = otherid;
		to.type = PROCESS;

		if (verify_edge(from, to)) {
			remove_edge(from, to);
		}
	}
	add_rela_table(mtx, tid);

}

void after_unlock(pthread_t tid, pthread_mutex_t *mtx) {
	del_rela_table(mtx, tid);
}


void check_dead_lock(void) {
	int i = 0;
	for (i = 0;i < tg->num;i ++) {
		search_for_cycle(i);
	}
}

static void *thread_routine(void *args) {

	while (1) {
		sleep(5);
		check_dead_lock();
	}
}