ROCT event

一. linux kernel的eventfd机制

让事件飞 ——Linux eventfd 原理与实践 - 知乎 (zhihu.com)icon-default.png?t=M276https://zhuanlan.zhihu.com/p/40572954在Linux系统中,eventfd是一个用来通知事件的文件描述符,是一种内核向用户空间的应用发送通知的机制,可以有效地被用来实现用户空间的事件/通知驱动的应用程序。

二.ROCT event的event机制

ROCT event的本质也是内核(KMD)向用户空间的应用发送通知的机制。

1.ROCT event类型

typedef enum _HSA_EVENTTYPE
{
    HSA_EVENTTYPE_SIGNAL                     = 0, //user-mode generated GPU signal
    HSA_EVENTTYPE_NODECHANGE                 = 1, //HSA node change (attach/detach)
    HSA_EVENTTYPE_DEVICESTATECHANGE          = 2, //HSA device state change( start/stop )
    HSA_EVENTTYPE_HW_EXCEPTION               = 3, //GPU shader exception event
    HSA_EVENTTYPE_SYSTEM_EVENT               = 4, //GPU SYSCALL with parameter info
    HSA_EVENTTYPE_DEBUG_EVENT                = 5, //GPU signal for debugging
    HSA_EVENTTYPE_PROFILE_EVENT              = 6, //GPU signal for profiling
    HSA_EVENTTYPE_QUEUE_EVENT                = 7, //GPU signal queue idle state (EOP pm4)
    HSA_EVENTTYPE_MEMORY                     = 8, //GPU signal for signaling memory access faults and memory subsystem issues
    //...
    HSA_EVENTTYPE_MAXID,
    HSA_EVENTTYPE_TYPE_SIZE                  = 0xFFFFFFFF
} HSA_EVENTTYPE;

2.ROCT event应用之——线程同步

(1) HSA_EVENTTYPE_SIGNAL

(2) HSA_EVENTTYPE_DEBUG_EVENT

这两种类型的event的共同点——用于线程间同步;

这两种类型的event的不同点——HSA_EVENTTYPE_SIGNAL可以通过调用hsaKmtSetEvent(HsaEvent *Event)或者硬件中断触发;HSA_EVENTTYPE_DEBUG_EVENT只能通过硬件中断触发。

3.问题1:为什么线程同步要用event??? 为什么不能用POSIX线程库API?

因为这里的线程是UMD线程,每个线程的运行都是为硬件服务的。所以,线程之间的同步必须和当前硬件的状态一致,也就是说这里线程之间的同步依赖于从内核(KMD)获取某些硬件信息。

3.问题2:对于HSA_EVENTTYPE_SIGNAL中的signal如何理解?

 

 二.ROCT event的源码分析

/*
 * Copyright © 2014 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use, copy,
 * modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including
 * the next paragraph) shall be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#include "libhsakmt.h"
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <errno.h>
#include <unistd.h>
#include <sys/mman.h>
#include <stdio.h>
#include "linux/kfd_ioctl.h"
#include "fmm.h"

static HSAuint64 *events_page = NULL;

void clear_events_page(void)
{
	events_page = NULL;
}

static bool IsSystemEventType(HSA_EVENTTYPE type)
{
	// Debug events behave as signal events.
	return (type != HSA_EVENTTYPE_SIGNAL && type != HSA_EVENTTYPE_DEBUG_EVENT);
}
/*
 * ManualReset:1.KMD针对每个event会创建一个状态flag, 如果将这个标记设置为ManualReset:那么通过hsaKmtSetEvent
 *			  会触发KMD将这个标记设置为1,hsaKmtWaitOnEvent会触发KMD去判断这个标记是否有效来决定event是否有效。
 * 			  同理,hsaKmtResetEvent会触发KMD将这个标记清0;
 * 			  2.如果将这个标记设置为!ManualReset,这个标记油KMD自动清0。
 * 
 * IsSignaled:如果创建event的时候传入IsSignaled为true,那阻塞在该event的线程调用hsaKmtWaitOnEvent的时候就会
 * 			  被唤醒,否则,需要手动调用hsaKmtSetEvent来唤醒阻塞线程。
 */
HSAKMT_STATUS HSAKMTAPI hsaKmtCreateEvent(HsaEventDescriptor *EventDesc,
					  bool ManualReset, bool IsSignaled,
					  HsaEvent **Event)
{
	unsigned int event_limit = KFD_SIGNAL_EVENT_LIMIT;

	CHECK_KFD_OPEN();

	if (EventDesc->EventType >= HSA_EVENTTYPE_MAXID)
		return HSAKMT_STATUS_INVALID_PARAMETER;

	HsaEvent *e = malloc(sizeof(HsaEvent));

	if (!e)
		return HSAKMT_STATUS_ERROR;

	memset(e, 0, sizeof(*e));

	struct kfd_ioctl_create_event_args args = {0};

	args.event_type = EventDesc->EventType;
	args.node_id = EventDesc->NodeId;
	args.auto_reset = !ManualReset;

	/* dGPU code */
	pthread_mutex_lock(&hsakmt_mutex);

	if (is_dgpu && !events_page) {
		events_page = allocate_exec_aligned_memory_gpu(
			KFD_SIGNAL_EVENT_LIMIT * 8, PAGE_SIZE, 0, true, false, true);
		if (!events_page) {
			pthread_mutex_unlock(&hsakmt_mutex);
			return HSAKMT_STATUS_ERROR;
		}
		fmm_get_handle(events_page, (uint64_t *)&args.event_page_offset);
	}

	if (kmtIoctl(kfd_fd, AMDKFD_IOC_CREATE_EVENT, &args) != 0) {
		free(e);
		*Event = NULL;
		pthread_mutex_unlock(&hsakmt_mutex);
		return HSAKMT_STATUS_ERROR;
	}

	e->EventId = args.event_id;

	if (!events_page && args.event_page_offset > 0) {
		events_page = mmap(NULL, event_limit * 8, PROT_WRITE | PROT_READ,
				MAP_SHARED, kfd_fd, args.event_page_offset);
		if (events_page == MAP_FAILED) {
			/* old kernels only support 256 events */
			event_limit = 256;
			events_page = mmap(NULL, PAGE_SIZE, PROT_WRITE | PROT_READ,
					   MAP_SHARED, kfd_fd, args.event_page_offset);
		}
		if (events_page == MAP_FAILED) {
			events_page = NULL;
			pthread_mutex_unlock(&hsakmt_mutex);
			hsaKmtDestroyEvent(e);
			return HSAKMT_STATUS_ERROR;
		}
	}

	pthread_mutex_unlock(&hsakmt_mutex);

	if (args.event_page_offset > 0 && args.event_slot_index < event_limit)
		e->EventData.HWData2 = (HSAuint64)&events_page[args.event_slot_index];

	e->EventData.EventType = EventDesc->EventType;
	e->EventData.HWData1 = args.event_id;

	e->EventData.HWData3 = args.event_trigger_data;
	e->EventData.EventData.SyncVar.SyncVar.UserData =
		EventDesc->SyncVar.SyncVar.UserData;
	e->EventData.EventData.SyncVar.SyncVarSize =
		EventDesc->SyncVar.SyncVarSize;

	//调用AMDKFD_IOC_SET_EVENT自动唤醒阻塞在该event的线程
	if (IsSignaled && !IsSystemEventType(e->EventData.EventType)) {
		struct kfd_ioctl_set_event_args set_args = {0};

		set_args.event_id = args.event_id;

		kmtIoctl(kfd_fd, AMDKFD_IOC_SET_EVENT, &set_args);
	}

	*Event = e;

	return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtDestroyEvent(HsaEvent *Event)
{
	CHECK_KFD_OPEN();

	if (!Event)
		return HSAKMT_STATUS_INVALID_HANDLE;

	struct kfd_ioctl_destroy_event_args args = {0};

	args.event_id = Event->EventId;

	if (kmtIoctl(kfd_fd, AMDKFD_IOC_DESTROY_EVENT, &args) != 0)
		return HSAKMT_STATUS_ERROR;

	free(Event);
	return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtSetEvent(HsaEvent *Event)
{
	CHECK_KFD_OPEN();

	if (!Event)
		return HSAKMT_STATUS_INVALID_HANDLE;

	/* Although the spec is doesn't say, don't allow system-defined events
	 * to be signaled.
	 */
	if (IsSystemEventType(Event->EventData.EventType))
		return HSAKMT_STATUS_ERROR;

	struct kfd_ioctl_set_event_args args = {0};

	args.event_id = Event->EventId;

	if (kmtIoctl(kfd_fd, AMDKFD_IOC_SET_EVENT, &args) == -1)
		return HSAKMT_STATUS_ERROR;

	return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtResetEvent(HsaEvent *Event)
{
	CHECK_KFD_OPEN();

	if (!Event)
		return HSAKMT_STATUS_INVALID_HANDLE;

	/* Although the spec is doesn't say, don't allow system-defined events
	 * to be signaled.
	 */
	if (IsSystemEventType(Event->EventData.EventType))
		return HSAKMT_STATUS_ERROR;

	struct kfd_ioctl_reset_event_args args = {0};

	args.event_id = Event->EventId;

	if (kmtIoctl(kfd_fd, AMDKFD_IOC_RESET_EVENT, &args) == -1)
		return HSAKMT_STATUS_ERROR;

	return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtQueryEventState(HsaEvent *Event)
{
	CHECK_KFD_OPEN();

	if (!Event)
		return HSAKMT_STATUS_INVALID_HANDLE;

	return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnEvent(HsaEvent *Event,
		HSAuint32 Milliseconds)
{
	if (!Event)
		return HSAKMT_STATUS_INVALID_HANDLE;

	return hsaKmtWaitOnMultipleEvents(&Event, 1, true, Milliseconds);
}

//Analysis memory exception data, print debug messages
static void analysis_memory_exception(struct kfd_hsa_memory_exception_data *
						memory_exception_data)
{
	HSAKMT_STATUS ret;
	HsaPointerInfo info;
	const uint64_t addr = memory_exception_data->va;
	uint32_t node_id = 0;
	unsigned int i;

	gpuid_to_nodeid(memory_exception_data->gpu_id, &node_id);
	pr_err("Memory exception on virtual address 0x%lx, ", addr);
	pr_err("node id %d : ", node_id);
	if (memory_exception_data->failure.NotPresent)
		pr_err("Page not present\n");
	else if (memory_exception_data->failure.ReadOnly)
		pr_err("Writing to readonly page\n");
	else if (memory_exception_data->failure.NoExecute)
		pr_err("Execute to none-executable page\n");

	ret = fmm_get_mem_info((const void *)addr, &info);

	if (ret != HSAKMT_STATUS_SUCCESS) {
		pr_err("Address does not belong to a known buffer\n");
		return;
	}

	pr_err("GPU address 0x%lx, node id %d, size in byte 0x%lx\n",
			info.GPUAddress, info.Node, info.SizeInBytes);
	switch (info.Type) {
	case HSA_POINTER_REGISTERED_SHARED:
		pr_err("Memory is registered shared buffer (IPC)\n");
		break;
	case HSA_POINTER_REGISTERED_GRAPHICS:
		pr_err("Memory is registered graphics buffer\n");
		break;
	case HSA_POINTER_REGISTERED_USER:
		pr_err("Memory is registered user pointer\n");
		pr_err("CPU address of the memory is %p\n", info.CPUAddress);
		break;
	case HSA_POINTER_ALLOCATED:
		pr_err("Memory is allocated using hsaKmtAllocMemory\n");
		pr_err("CPU address of the memory is %p\n", info.CPUAddress);
		break;
	default:
		pr_err("Invalid memory type %d\n", info.Type);
		break;
	}

	if (info.RegisteredNodes) {
		pr_err("Memory is registered to node id: ");
		for (i = 0; i < info.NRegisteredNodes; i++)
			pr_err("%d ", info.RegisteredNodes[i]);
		pr_err("\n");
	}
	if (info.MappedNodes) {
		pr_err("Memory is mapped to node id: ");
		for (i = 0; i < info.NMappedNodes; i++)
			pr_err("%d ", info.MappedNodes[i]);
		pr_err("\n");
	}
}

/*
 * WaitOnAll:等到所有event才唤醒阻塞线程还是说等到一个event就唤醒
 * Milliseconds:超时时间到如果还没有event到,就不用再阻塞等待线程
 */
HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnMultipleEvents(HsaEvent *Events[],
						   HSAuint32 NumEvents,
						   bool WaitOnAll,
						   HSAuint32 Milliseconds)
{
	CHECK_KFD_OPEN();

	if (!Events)
		return HSAKMT_STATUS_INVALID_HANDLE;

	struct kfd_event_data *event_data = calloc(NumEvents, sizeof(struct kfd_event_data));

	for (HSAuint32 i = 0; i < NumEvents; i++) {
		event_data[i].event_id = Events[i]->EventId;
		event_data[i].kfd_event_data_ext = (uint64_t)(uintptr_t)NULL;
	}

	struct kfd_ioctl_wait_events_args args = {0};

	args.wait_for_all = WaitOnAll;
	args.timeout = Milliseconds;
	args.num_events = NumEvents;
	args.events_ptr = (uint64_t)(uintptr_t)event_data;	//要监控的event列表

	HSAKMT_STATUS result;

	if (kmtIoctl(kfd_fd, AMDKFD_IOC_WAIT_EVENTS, &args) == -1)
		result = HSAKMT_STATUS_ERROR;
	else if (args.wait_result == KFD_IOC_WAIT_RESULT_TIMEOUT)
		result = HSAKMT_STATUS_WAIT_TIMEOUT;
	else {
		/* 针对HSA_EVENTTYPE_MEMORY类型event的处理,暂且不管 */
		result = HSAKMT_STATUS_SUCCESS;
		for (HSAuint32 i = 0; i < NumEvents; i++) {
			if (Events[i]->EventData.EventType == HSA_EVENTTYPE_MEMORY &&
			    event_data[i].memory_exception_data.gpu_id) {
				Events[i]->EventData.EventData.MemoryAccessFault.VirtualAddress = event_data[i].memory_exception_data.va;
				result = gpuid_to_nodeid(event_data[i].memory_exception_data.gpu_id, &Events[i]->EventData.EventData.MemoryAccessFault.NodeId);
				if (result != HSAKMT_STATUS_SUCCESS)
					goto out;
				Events[i]->EventData.EventData.MemoryAccessFault.Failure.NotPresent = event_data[i].memory_exception_data.failure.NotPresent;
				Events[i]->EventData.EventData.MemoryAccessFault.Failure.ReadOnly = event_data[i].memory_exception_data.failure.ReadOnly;
				Events[i]->EventData.EventData.MemoryAccessFault.Failure.NoExecute = event_data[i].memory_exception_data.failure.NoExecute;
				Events[i]->EventData.EventData.MemoryAccessFault.Failure.Imprecise = event_data[i].memory_exception_data.failure.imprecise;
				Events[i]->EventData.EventData.MemoryAccessFault.Failure.ErrorType = event_data[i].memory_exception_data.ErrorType;
				Events[i]->EventData.EventData.MemoryAccessFault.Failure.ECC =
						((event_data[i].memory_exception_data.ErrorType == 1) || (event_data[i].memory_exception_data.ErrorType == 2)) ? 1 : 0;
				Events[i]->EventData.EventData.MemoryAccessFault.Flags = HSA_EVENTID_MEMORY_FATAL_PROCESS;
				analysis_memory_exception(&event_data[i].memory_exception_data);
			}
		}
	}
out:
	free(event_data);

	return result;
}

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

denglin12315

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值