前言
本文主要是debug AMDGPU SDMA的doorbell offset。
一、bpftrace+test case
#!/usr/bin/env bpftrace
#include "/usr/src/linux-hwe-5.13-headers-5.13.0-35/include/uapi/linux/kfd_ioctl.h"
k:kfd_get_doorbell_dw_offset_in_bar {
printf("%s:\n", func);
printf(" doorbell id: 0x%x\n", arg2);
}
kr:kfd_get_doorbell_dw_offset_in_bar {
printf("%s:\n", func);
printf(" doorbell_off: 0x%x\n", retval);
}
k:pqm_create_queue {
printf("%s:\n", func);
@queue_id = arg4;
@doorbell_offset_in_process = arg5;
}
kr:pqm_create_queue {
printf("%s:\n", func);
printf(" queue_id: 0x%x\n", *@queue_id);
printf(" doorbell_offset_in_process: 0x%x\n", *@doorbell_offset_in_process);
}
k:kfd_ioctl_create_queue {
printf("%s:\n", func);
@args = (struct kfd_ioctl_create_queue_args *)arg2;
}
kr:kfd_ioctl_create_queue {
printf("%s:\n", func);
printf(" doorbell offset 0x%llx \n", @args->doorbell_offset);
}
TEST_F(KFDQMTest, AllSdmaQueues)
{
TEST_START(TESTPROFILE_RUNALL)
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
int bufSize = PAGE_SIZE;
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
unsigned int numSdmaQueues = m_numSdmaEngines * m_numSdmaQueuesPerEngine;
LOG() << "Regular SDMA engines number: " << m_numSdmaEngines
<< " SDMA queues per engine: " << m_numSdmaQueuesPerEngine << std::endl;
HsaMemoryBuffer destBuf(bufSize << 1, defaultGPUNode, false);
HsaMemoryBuffer srcBuf(bufSize, defaultGPUNode, false);
destBuf.Fill(0xFF);
std::vector<SDMAQueue> queues(numSdmaQueues);
for (unsigned int qidx = 0; qidx < numSdmaQueues; ++qidx)
{
printf("qid: 0x%x \n", qidx);
ASSERT_SUCCESS(queues[qidx].Create(defaultGPUNode));
}
for (unsigned int qidx = 0; qidx < numSdmaQueues; ++qidx)
{
destBuf.Fill(0x0);
srcBuf.Fill(qidx + 0xa0);
queues[qidx].PlaceAndSubmitPacket(
SDMACopyDataPacket(queues[qidx].GetFamilyId(), destBuf.As<unsigned int *>(), srcBuf.As<unsigned int *>(), bufSize));
queues[qidx].PlaceAndSubmitPacket(
SDMAWriteDataPacket(queues[qidx].GetFamilyId(), destBuf.As<unsigned int *>() + bufSize / 4, 0x02020202));
queues[qidx].Wait4PacketConsumption();
EXPECT_TRUE(WaitOnValue(destBuf.As<unsigned int *>() + bufSize / 4, 0x02020202));
EXPECT_SUCCESS(memcmp(
destBuf.As<unsigned int *>(), srcBuf.As<unsigned int *>(), bufSize));
}
for (unsigned int qidx = 0; qidx < numSdmaQueues; ++qidx)
EXPECT_SUCCESS(queues[qidx].Destroy());
TEST_END
}
二、结果
gl@pc:~/code/bpftrace$ sudo ./amdgpu.bt
Attaching 6 probes...
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0x100
kretprobe_trampoline:
doorbell_off: 0x1200
kretprobe_trampoline:
queue_id: 0x0
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c00000000800
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0x10a
kretprobe_trampoline:
doorbell_off: 0x1214
kretprobe_trampoline:
queue_id: 0x1
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c00000000850
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0x300
kretprobe_trampoline:
doorbell_off: 0x1600
kretprobe_trampoline:
queue_id: 0x2
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c00000001800
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0x30a
kretprobe_trampoline:
doorbell_off: 0x1614
kretprobe_trampoline:
queue_id: 0x3
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c00000001850
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0x101
kretprobe_trampoline:
doorbell_off: 0x1202
kretprobe_trampoline:
queue_id: 0x4
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c00000000808
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0x10b
kretprobe_trampoline:
doorbell_off: 0x1216
kretprobe_trampoline:
queue_id: 0x5
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c00000000858
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0x301
kretprobe_trampoline:
doorbell_off: 0x1602
kretprobe_trampoline:
queue_id: 0x6
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c00000001808
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0x30b
kretprobe_trampoline:
doorbell_off: 0x1616
kretprobe_trampoline:
queue_id: 0x7
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c00000001858
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0x102
kretprobe_trampoline:
doorbell_off: 0x1204
kretprobe_trampoline:
queue_id: 0x8
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c00000000810
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0x10c
kretprobe_trampoline:
doorbell_off: 0x1218
kretprobe_trampoline:
queue_id: 0x9
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c00000000860
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0x302
kretprobe_trampoline:
doorbell_off: 0x1604
kretprobe_trampoline:
queue_id: 0xa
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c00000001810
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0x30c
kretprobe_trampoline:
doorbell_off: 0x1618
kretprobe_trampoline:
queue_id: 0xb
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c00000001860
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0x103
kretprobe_trampoline:
doorbell_off: 0x1206
kretprobe_trampoline:
queue_id: 0xc
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c00000000818
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0x10d
kretprobe_trampoline:
doorbell_off: 0x121a
kretprobe_trampoline:
queue_id: 0xd
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c00000000868
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0x303
kretprobe_trampoline:
doorbell_off: 0x1606
kretprobe_trampoline:
queue_id: 0xe
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c00000001818
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0x30d
kretprobe_trampoline:
doorbell_off: 0x161a
kretprobe_trampoline:
queue_id: 0xf
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c00000001868
^C
@args: -80692590903848
@doorbell_offset_in_process: 0
@queue_id: 18446663381118647496
gl@pc:~/code/bpftrace$
结果分析
以0xd134c00000001868
为例,62 ~ 63位是MMAP type,46 ~ 61位是gpuid(0x44d3);
/* Use upper bits of mmap offset to store KFD driver specific information.
* BITS[63:62] - Encode MMAP type
* BITS[61:46] - Encode gpu_id. To identify to which GPU the offset belongs to
* BITS[45:0] - MMAP offset value
*
* NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
* defines are w.r.t to PAGE_SIZE
*/
#define KFD_MMAP_TYPE_SHIFT 62
#define KFD_MMAP_TYPE_MASK (0x3ULL << KFD_MMAP_TYPE_SHIFT)
#define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT)
#define KFD_MMAP_TYPE_EVENTS (0x2ULL << KFD_MMAP_TYPE_SHIFT)
#define KFD_MMAP_TYPE_RESERVED_MEM (0x1ULL << KFD_MMAP_TYPE_SHIFT)
#define KFD_MMAP_TYPE_MMIO (0x0ULL << KFD_MMAP_TYPE_SHIFT)
#define KFD_MMAP_GPU_ID_SHIFT 46
#define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \
<< KFD_MMAP_GPU_ID_SHIFT)
#define KFD_MMAP_GPU_ID(gpu_id) ((((uint64_t)gpu_id) << KFD_MMAP_GPU_ID_SHIFT)\
& KFD_MMAP_GPU_ID_MASK)
#define KFD_MMAP_GET_GPU_ID(offset) ((offset & KFD_MMAP_GPU_ID_MASK) \
>> KFD_MMAP_GPU_ID_SHIFT)
最低位的0x1868的计算方式如下,
/* Return gpu_id as doorbell offset for mmap usage */
args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
if (KFD_IS_SOC15(dev->device_info->asic_family))
/* On SOC15 ASICs, include the doorbell offset within the
* process doorbell frame, which is 2 pages.
*/
args->doorbell_offset |= doorbell_offset_in_process;
其中doorbell_offset_in_process的值本应该是0x5868,但是下面这段代码写明了每个process最多1024doorbell,每个doorbell 8 byte,所以就把0x5868截断成为了0x1868;
if (q && p_doorbell_offset_in_process)
/* Return the doorbell offset within the doorbell page
* to the caller so it can be passed up to user mode
* (in bytes).
* There are always 1024 doorbells per process, so in case
* of 8-byte doorbells, there are two doorbell pages per
* process.
*/
*p_doorbell_offset_in_process =
(q->properties.doorbell_off * sizeof(uint32_t)) &
(kfd_doorbell_process_slice(dev) - 1);
那么紧跟着新问题就来了,cp queue会不会跟sdma queue的doorbell 空间冲突?
我们来看一把cp queue的doorbell空间
gl@pc:~/code/bpftrace$ sudo ./amdgpu.bt
[sudo] password for gl:
Attaching 6 probes...
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0x0
kretprobe_trampoline:
doorbell_off: 0x1000
kretprobe_trampoline:
queue_id: 0x0
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c00000000000
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0x1
kretprobe_trampoline:
doorbell_off: 0x1002
kretprobe_trampoline:
queue_id: 0x1
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c00000000008
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0x2
kretprobe_trampoline:
doorbell_off: 0x1004
kretprobe_trampoline:
queue_id: 0x2
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c00000000010
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0x3
kretprobe_trampoline:
doorbell_off: 0x1006
kretprobe_trampoline:
queue_id: 0x3
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c00000000018
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0x4
kretprobe_trampoline:
doorbell_off: 0x1008
kretprobe_trampoline:
queue_id: 0x4
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c00000000020
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0x5
kretprobe_trampoline:
doorbell_off: 0x100a
kretprobe_trampoline:
queue_id: 0x5
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c00000000028
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0x6
kretprobe_trampoline:
doorbell_off: 0x100c
kretprobe_trampoline:
queue_id: 0x6
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c00000000030
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0x7
kretprobe_trampoline:
doorbell_off: 0x100e
kretprobe_trampoline:
queue_id: 0x7
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c00000000038
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0x8
kretprobe_trampoline:
doorbell_off: 0x1010
kretprobe_trampoline:
queue_id: 0x8
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c00000000040
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0x9
kretprobe_trampoline:
doorbell_off: 0x1012
kretprobe_trampoline:
queue_id: 0x9
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c00000000048
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0xa
kretprobe_trampoline:
doorbell_off: 0x1014
kretprobe_trampoline:
queue_id: 0xa
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c00000000050
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0xb
kretprobe_trampoline:
doorbell_off: 0x1016
kretprobe_trampoline:
queue_id: 0xb
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c00000000058
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0xc
kretprobe_trampoline:
doorbell_off: 0x1018
kretprobe_trampoline:
queue_id: 0xc
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c00000000060
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0xd
kretprobe_trampoline:
doorbell_off: 0x101a
kretprobe_trampoline:
queue_id: 0xd
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c00000000068
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0xe
kretprobe_trampoline:
doorbell_off: 0x101c
kretprobe_trampoline:
queue_id: 0xe
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c00000000070
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0xf
kretprobe_trampoline:
doorbell_off: 0x101e
kretprobe_trampoline:
queue_id: 0xf
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c00000000078
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0x10
kretprobe_trampoline:
doorbell_off: 0x1020
kretprobe_trampoline:
queue_id: 0x10
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c00000000080
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0x11
kretprobe_trampoline:
doorbell_off: 0x1022
kretprobe_trampoline:
queue_id: 0x11
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c00000000088
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0x12
kretprobe_trampoline:
doorbell_off: 0x1024
kretprobe_trampoline:
queue_id: 0x12
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c00000000090
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0x13
kretprobe_trampoline:
doorbell_off: 0x1026
kretprobe_trampoline:
queue_id: 0x13
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c00000000098
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0x14
kretprobe_trampoline:
doorbell_off: 0x1028
kretprobe_trampoline:
queue_id: 0x14
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c000000000a0
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0x15
kretprobe_trampoline:
doorbell_off: 0x102a
kretprobe_trampoline:
queue_id: 0x15
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c000000000a8
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0x16
kretprobe_trampoline:
doorbell_off: 0x102c
kretprobe_trampoline:
queue_id: 0x16
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c000000000b0
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
doorbell id: 0x17
kretprobe_trampoline:
doorbell_off: 0x102e
kretprobe_trampoline:
queue_id: 0x17
doorbell_offset_in_process: 0x0
kretprobe_trampoline:
doorbell offset 0xd134c000000000b8
可以看到cp queue的doorbell空间是逐个递增的,这就决定了它最多增加到0x800,0x800÷8=0x100=256,从而决定cp queue最多只能创建255个,所以目前HIP层限定了cp queue最大个数为128。
再加一段
HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueue(HSAuint32 NodeId,
HSA_QUEUE_TYPE Type,
HSAuint32 QueuePercentage,
HSA_QUEUE_PRIORITY Priority,
void *QueueAddress,
HSAuint64 QueueSizeInBytes,
HsaEvent *Event,
HsaQueueResource *QueueResource)
{
// .......
args.read_pointer_address = QueueResource->QueueRptrValue;
args.write_pointer_address = QueueResource->QueueWptrValue;
args.ring_base_address = (uintptr_t)QueueAddress;
args.ring_size = QueueSizeInBytes;
args.queue_percentage = QueuePercentage;
args.queue_priority = priority_map[Priority+3];
err = kmtIoctl(kfd_fd, AMDKFD_IOC_CREATE_QUEUE, &args);
if (err == -1) {
free_queue(q);
return HSAKMT_STATUS_ERROR;
}
q->queue_id = args.queue_id;
printf("leiDebug: args.doorbell_offset: 0x%llx \n", args.doorbell_offset);
if (IS_SOC15(q->gfxv)) {
/* On SOC15 chips, the doorbell offset within the
* doorbell page is included in the doorbell offset
* returned by KFD. This allows CP queue doorbells to be
* allocated dynamically (while SDMA queue doorbells fixed)
* rather than based on the its process queue ID.
*/
doorbell_mmap_offset = args.doorbell_offset &
~(HSAuint64)(doorbells[NodeId].size - 1);
doorbell_offset = args.doorbell_offset &
(doorbells[NodeId].size - 1);
} else {
/* On older chips, the doorbell offset within the
* doorbell page is based on the queue ID.
*/
doorbell_mmap_offset = args.doorbell_offset;
doorbell_offset = q->queue_id * DOORBELL_SIZE(q->gfxv);
}
printf("leiDebug: doorbell_mmap_offset: 0x%lx \n", doorbell_mmap_offset);
printf("leiDebug: doorbell_offset: 0x%x \n", doorbell_offset);
/*
leiDebug: doorbell_mmap_offset: 0xd134c00000000000
leiDebug: doorbell_offset: 0x800
*/
// 就是把0xd134c00000000000 map 给gpu,获得一个虚拟地址,并将之赋值给doorbells[NodeId].mapping
err = map_doorbell(NodeId, gpu_id, doorbell_mmap_offset);
if (err != HSAKMT_STATUS_SUCCESS) {
hsaKmtDestroyQueue(q->queue_id);
free_queue(q);
return HSAKMT_STATUS_ERROR;
}
QueueResource->QueueId = PORT_VPTR_TO_UINT64(q);
QueueResource->Queue_DoorBell = VOID_PTR_ADD(doorbells[NodeId].mapping,
doorbell_offset);
/*
(gdb) p/x QueueResource->Queue_DoorBell
$21 = 0x7ffff7fba800
(gdb) p/x doorbells[NodeId].mapping
$22 = 0x7ffff7fba000
(gdb)
*/
return HSAKMT_STATUS_SUCCESS;
}
总结
2MB的doorbell空间,最多256个process,每个process最多可以分配1024个doorbell,每个doorbell占用8Byte。
前两个page是给KMD用的,所以第一个进程是从0x2200004000开始的。
[257021.463294] [2425430] amdgpu: Mapping doorbell page
target user address == 0x7F31DE3B6000
physical address == 0x2200004000
vm_flags == 0x42644BB
size == 0x2000
第二个进程
[688365.466517] [953913] amdgpu: Mapping doorbell page
target user address == 0x7FFFF7FBE000
physical address == 0x2200006000
vm_flags == 0x42644BB
size == 0x2000
每个process,分配8192Byte(0x2000)空间,1024 = 0x400。
0x0 ~ 0x100 是cp的地址空间,
[607422.170677] [1483203] amdgpu: reserved doorbell 0x100 - 0x18f
[607422.170680] [1483203] amdgpu: reserved doorbell 0x300 - 0x38f
typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT
{
/* Compute + GFX: 0~255 */
AMDGPU_VEGA20_DOORBELL_KIQ = 0x000,
AMDGPU_VEGA20_DOORBELL_HIQ = 0x001,
AMDGPU_VEGA20_DOORBELL_DIQ = 0x002,
AMDGPU_VEGA20_DOORBELL_MEC_RING0 = 0x003,
AMDGPU_VEGA20_DOORBELL_MEC_RING1 = 0x004,
AMDGPU_VEGA20_DOORBELL_MEC_RING2 = 0x005,
AMDGPU_VEGA20_DOORBELL_MEC_RING3 = 0x006,
AMDGPU_VEGA20_DOORBELL_MEC_RING4 = 0x007,
AMDGPU_VEGA20_DOORBELL_MEC_RING5 = 0x008,
AMDGPU_VEGA20_DOORBELL_MEC_RING6 = 0x009,
AMDGPU_VEGA20_DOORBELL_MEC_RING7 = 0x00A,
AMDGPU_VEGA20_DOORBELL_USERQUEUE_START = 0x00B,
AMDGPU_VEGA20_DOORBELL_USERQUEUE_END = 0x08A,
AMDGPU_VEGA20_DOORBELL_GFX_RING0 = 0x08B,
/* SDMA:256~335*/
AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0 = 0x100,
AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE1 = 0x10A,
AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE2 = 0x114,
AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE3 = 0x11E,
AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE4 = 0x128,
AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE5 = 0x132,
AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE6 = 0x13C,
AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE7 = 0x146,
AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE8 = 0x150,
/* IH: 376~391 */
AMDGPU_VEGA20_DOORBELL_IH = 0x178,
/* MMSCH: 392~407
* overlap the doorbell assignment with VCN as they are mutually exclusive
* VCN engine's doorbell is 32 bit and two VCN ring share one QWORD
*/
AMDGPU_VEGA20_DOORBELL64_VCN0_1 = 0x188, /* VNC0 */
AMDGPU_VEGA20_DOORBELL64_VCN2_3 = 0x189,
AMDGPU_VEGA20_DOORBELL64_VCN4_5 = 0x18A,
AMDGPU_VEGA20_DOORBELL64_VCN6_7 = 0x18B,
AMDGPU_VEGA20_DOORBELL64_VCN8_9 = 0x18C, /* VNC1 */
AMDGPU_VEGA20_DOORBELL64_VCNa_b = 0x18D,
AMDGPU_VEGA20_DOORBELL64_VCNc_d = 0x18E,
AMDGPU_VEGA20_DOORBELL64_VCNe_f = 0x18F,
AMDGPU_VEGA20_DOORBELL64_UVD_RING0_1 = 0x188,
AMDGPU_VEGA20_DOORBELL64_UVD_RING2_3 = 0x189,
AMDGPU_VEGA20_DOORBELL64_UVD_RING4_5 = 0x18A,
AMDGPU_VEGA20_DOORBELL64_UVD_RING6_7 = 0x18B,
AMDGPU_VEGA20_DOORBELL64_VCE_RING0_1 = 0x18C,
AMDGPU_VEGA20_DOORBELL64_VCE_RING2_3 = 0x18D,
AMDGPU_VEGA20_DOORBELL64_VCE_RING4_5 = 0x18E,
AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7 = 0x18F,
AMDGPU_VEGA20_DOORBELL64_FIRST_NON_CP = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0,
AMDGPU_VEGA20_DOORBELL64_LAST_NON_CP = AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7,
AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT = 0x18F,
AMDGPU_VEGA20_DOORBELL_INVALID = 0xFFFF
} AMDGPU_VEGA20_DOORBELL_ASSIGNMENT;
bitmap init的时候会把这些reserve的doorbell跳掉
static int init_doorbell_bitmap(struct qcm_process_device *qpd,
struct kfd_dev *dev)
{
unsigned int i;
int range_start = dev->shared_resources.non_cp_doorbells_start;
int range_end = dev->shared_resources.non_cp_doorbells_end;
if (!KFD_IS_SOC15(dev))
return 0;
#if defined(HAVE_BITMAP_FUNCS)
qpd->doorbell_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
GFP_KERNEL);
#else
qpd->doorbell_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
GFP_KERNEL);
#endif
if (!qpd->doorbell_bitmap)
return -ENOMEM;
/* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */
pr_debug("reserved doorbell 0x%03x - 0x%03x\n", range_start, range_end);
pr_debug("reserved doorbell 0x%03x - 0x%03x\n",
range_start + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
range_end + KFD_QUEUE_DOORBELL_MIRROR_OFFSET);
for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) {
if (i >= range_start && i <= range_end) {
__set_bit(i, qpd->doorbell_bitmap);
__set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
qpd->doorbell_bitmap);
}
}
return 0;
}