AMDGPU doorbell offset分配


前言

本文主要是debug AMDGPU SDMA的doorbell offset。


一、bpftrace+test case

#!/usr/bin/env bpftrace
#include "/usr/src/linux-hwe-5.13-headers-5.13.0-35/include/uapi/linux/kfd_ioctl.h"
k:kfd_get_doorbell_dw_offset_in_bar {
        printf("%s:\n", func);
        printf("    doorbell id: 0x%x\n", arg2);
}
kr:kfd_get_doorbell_dw_offset_in_bar {
        printf("%s:\n", func);
        printf("    doorbell_off: 0x%x\n", retval);
}

k:pqm_create_queue {
        printf("%s:\n", func);
        @queue_id = arg4;
        @doorbell_offset_in_process = arg5;
}
kr:pqm_create_queue {
        printf("%s:\n", func);
        printf("    queue_id: 0x%x\n", *@queue_id);
        printf("    doorbell_offset_in_process: 0x%x\n", *@doorbell_offset_in_process);

}

k:kfd_ioctl_create_queue {
        printf("%s:\n", func);
        @args = (struct kfd_ioctl_create_queue_args *)arg2;
}

kr:kfd_ioctl_create_queue {
        printf("%s:\n", func);
        printf("    doorbell offset 0x%llx \n", @args->doorbell_offset);
}

TEST_F(KFDQMTest, AllSdmaQueues)
{
    TEST_START(TESTPROFILE_RUNALL)

    int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
    int bufSize = PAGE_SIZE;
    ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";

    unsigned int numSdmaQueues = m_numSdmaEngines * m_numSdmaQueuesPerEngine;

    LOG() << "Regular SDMA engines number: " << m_numSdmaEngines
          << " SDMA queues per engine: " << m_numSdmaQueuesPerEngine << std::endl;

    HsaMemoryBuffer destBuf(bufSize << 1, defaultGPUNode, false);
    HsaMemoryBuffer srcBuf(bufSize, defaultGPUNode, false);
    destBuf.Fill(0xFF);

    std::vector<SDMAQueue> queues(numSdmaQueues);

    for (unsigned int qidx = 0; qidx < numSdmaQueues; ++qidx)
    {
       printf("qid: 0x%x \n", qidx);
       ASSERT_SUCCESS(queues[qidx].Create(defaultGPUNode));
    }

    for (unsigned int qidx = 0; qidx < numSdmaQueues; ++qidx)
    {
       destBuf.Fill(0x0);
       srcBuf.Fill(qidx + 0xa0);
       queues[qidx].PlaceAndSubmitPacket(
           SDMACopyDataPacket(queues[qidx].GetFamilyId(), destBuf.As<unsigned int *>(), srcBuf.As<unsigned int *>(), bufSize));
       queues[qidx].PlaceAndSubmitPacket(
           SDMAWriteDataPacket(queues[qidx].GetFamilyId(), destBuf.As<unsigned int *>() + bufSize / 4, 0x02020202));

       queues[qidx].Wait4PacketConsumption();

       EXPECT_TRUE(WaitOnValue(destBuf.As<unsigned int *>() + bufSize / 4, 0x02020202));

       EXPECT_SUCCESS(memcmp(
           destBuf.As<unsigned int *>(), srcBuf.As<unsigned int *>(), bufSize));
    }

    for (unsigned int qidx = 0; qidx < numSdmaQueues; ++qidx)
       EXPECT_SUCCESS(queues[qidx].Destroy());

    TEST_END
}

二、结果

gl@pc:~/code/bpftrace$ sudo ./amdgpu.bt 
Attaching 6 probes...
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0x100
kretprobe_trampoline:
    doorbell_off: 0x1200
kretprobe_trampoline:
    queue_id: 0x0
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c00000000800 
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0x10a
kretprobe_trampoline:
    doorbell_off: 0x1214
kretprobe_trampoline:
    queue_id: 0x1
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c00000000850 
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0x300
kretprobe_trampoline:
    doorbell_off: 0x1600
kretprobe_trampoline:
    queue_id: 0x2
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c00000001800 
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0x30a
kretprobe_trampoline:
    doorbell_off: 0x1614
kretprobe_trampoline:
    queue_id: 0x3
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c00000001850 
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0x101
kretprobe_trampoline:
    doorbell_off: 0x1202
kretprobe_trampoline:
    queue_id: 0x4
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c00000000808 
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0x10b
kretprobe_trampoline:
    doorbell_off: 0x1216
kretprobe_trampoline:
    queue_id: 0x5
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c00000000858 
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0x301
kretprobe_trampoline:
    doorbell_off: 0x1602
kretprobe_trampoline:
    queue_id: 0x6
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c00000001808 
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0x30b
kretprobe_trampoline:
    doorbell_off: 0x1616
kretprobe_trampoline:
    queue_id: 0x7
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c00000001858 
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0x102
kretprobe_trampoline:
    doorbell_off: 0x1204
kretprobe_trampoline:
    queue_id: 0x8
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c00000000810 
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0x10c
kretprobe_trampoline:
    doorbell_off: 0x1218
kretprobe_trampoline:
    queue_id: 0x9
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c00000000860 
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0x302
kretprobe_trampoline:
    doorbell_off: 0x1604
kretprobe_trampoline:
    queue_id: 0xa
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c00000001810 
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0x30c
kretprobe_trampoline:
    doorbell_off: 0x1618
kretprobe_trampoline:
    queue_id: 0xb
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c00000001860 
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0x103
kretprobe_trampoline:
    doorbell_off: 0x1206
kretprobe_trampoline:
    queue_id: 0xc
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c00000000818 
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0x10d
kretprobe_trampoline:
    doorbell_off: 0x121a
kretprobe_trampoline:
    queue_id: 0xd
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c00000000868 
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0x303
kretprobe_trampoline:
    doorbell_off: 0x1606
kretprobe_trampoline:
    queue_id: 0xe
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c00000001818 
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0x30d
kretprobe_trampoline:
    doorbell_off: 0x161a
kretprobe_trampoline:
    queue_id: 0xf
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c00000001868 


^C

@args: -80692590903848

@doorbell_offset_in_process: 0

@queue_id: 18446663381118647496

gl@pc:~/code/bpftrace$ 


结果分析

0xd134c00000001868为例,62 ~ 63位是MMAP type,46 ~ 61位是gpuid(0x44d3);

/* Use upper bits of mmap offset to store KFD driver specific information.
 * BITS[63:62] - Encode MMAP type
 * BITS[61:46] - Encode gpu_id. To identify to which GPU the offset belongs to
 * BITS[45:0]  - MMAP offset value
 *
 * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
 *  defines are w.r.t to PAGE_SIZE
 */
#define KFD_MMAP_TYPE_SHIFT	62
#define KFD_MMAP_TYPE_MASK	(0x3ULL << KFD_MMAP_TYPE_SHIFT)
#define KFD_MMAP_TYPE_DOORBELL	(0x3ULL << KFD_MMAP_TYPE_SHIFT)
#define KFD_MMAP_TYPE_EVENTS	(0x2ULL << KFD_MMAP_TYPE_SHIFT)
#define KFD_MMAP_TYPE_RESERVED_MEM	(0x1ULL << KFD_MMAP_TYPE_SHIFT)
#define KFD_MMAP_TYPE_MMIO	(0x0ULL << KFD_MMAP_TYPE_SHIFT)

#define KFD_MMAP_GPU_ID_SHIFT 46
#define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \
				<< KFD_MMAP_GPU_ID_SHIFT)
#define KFD_MMAP_GPU_ID(gpu_id) ((((uint64_t)gpu_id) << KFD_MMAP_GPU_ID_SHIFT)\
				& KFD_MMAP_GPU_ID_MASK)
#define KFD_MMAP_GET_GPU_ID(offset)    ((offset & KFD_MMAP_GPU_ID_MASK) \
				>> KFD_MMAP_GPU_ID_SHIFT)

最低位的0x1868的计算方式如下,

	/* Return gpu_id as doorbell offset for mmap usage */
	args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
	args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
	if (KFD_IS_SOC15(dev->device_info->asic_family))
		/* On SOC15 ASICs, include the doorbell offset within the
		 * process doorbell frame, which is 2 pages.
		 */
		args->doorbell_offset |= doorbell_offset_in_process;

其中doorbell_offset_in_process的值本应该是0x5868,但是下面这段代码写明了每个process最多1024doorbell,每个doorbell 8 byte,所以就把0x5868截断成为了0x1868;

	if (q && p_doorbell_offset_in_process)
		/* Return the doorbell offset within the doorbell page
		 * to the caller so it can be passed up to user mode
		 * (in bytes).
		 * There are always 1024 doorbells per process, so in case
		 * of 8-byte doorbells, there are two doorbell pages per
		 * process.
		 */
		*p_doorbell_offset_in_process =
			(q->properties.doorbell_off * sizeof(uint32_t)) &
			(kfd_doorbell_process_slice(dev) - 1);

那么紧跟着新问题就来了,cp queue会不会跟sdma queue的doorbell 空间冲突?
我们来看一把cp queue的doorbell空间

gl@pc:~/code/bpftrace$ sudo ./amdgpu.bt 
[sudo] password for gl: 
Attaching 6 probes...
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0x0
kretprobe_trampoline:
    doorbell_off: 0x1000
kretprobe_trampoline:
    queue_id: 0x0
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c00000000000 
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0x1
kretprobe_trampoline:
    doorbell_off: 0x1002
kretprobe_trampoline:
    queue_id: 0x1
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c00000000008 
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0x2
kretprobe_trampoline:
    doorbell_off: 0x1004
kretprobe_trampoline:
    queue_id: 0x2
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c00000000010 
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0x3
kretprobe_trampoline:
    doorbell_off: 0x1006
kretprobe_trampoline:
    queue_id: 0x3
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c00000000018 
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0x4
kretprobe_trampoline:
    doorbell_off: 0x1008
kretprobe_trampoline:
    queue_id: 0x4
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c00000000020 
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0x5
kretprobe_trampoline:
    doorbell_off: 0x100a
kretprobe_trampoline:
    queue_id: 0x5
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c00000000028 
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0x6
kretprobe_trampoline:
    doorbell_off: 0x100c
kretprobe_trampoline:
    queue_id: 0x6
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c00000000030 
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0x7
kretprobe_trampoline:
    doorbell_off: 0x100e
kretprobe_trampoline:
    queue_id: 0x7
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c00000000038 
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0x8
kretprobe_trampoline:
    doorbell_off: 0x1010
kretprobe_trampoline:
    queue_id: 0x8
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c00000000040 
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0x9
kretprobe_trampoline:
    doorbell_off: 0x1012
kretprobe_trampoline:
    queue_id: 0x9
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c00000000048 
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0xa
kretprobe_trampoline:
    doorbell_off: 0x1014
kretprobe_trampoline:
    queue_id: 0xa
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c00000000050 
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0xb
kretprobe_trampoline:
    doorbell_off: 0x1016
kretprobe_trampoline:
    queue_id: 0xb
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c00000000058 
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0xc
kretprobe_trampoline:
    doorbell_off: 0x1018
kretprobe_trampoline:
    queue_id: 0xc
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c00000000060 
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0xd
kretprobe_trampoline:
    doorbell_off: 0x101a
kretprobe_trampoline:
    queue_id: 0xd
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c00000000068 
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0xe
kretprobe_trampoline:
    doorbell_off: 0x101c
kretprobe_trampoline:
    queue_id: 0xe
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c00000000070 
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0xf
kretprobe_trampoline:
    doorbell_off: 0x101e
kretprobe_trampoline:
    queue_id: 0xf
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c00000000078 
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0x10
kretprobe_trampoline:
    doorbell_off: 0x1020
kretprobe_trampoline:
    queue_id: 0x10
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c00000000080 
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0x11
kretprobe_trampoline:
    doorbell_off: 0x1022
kretprobe_trampoline:
    queue_id: 0x11
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c00000000088 
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0x12
kretprobe_trampoline:
    doorbell_off: 0x1024
kretprobe_trampoline:
    queue_id: 0x12
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c00000000090 
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0x13
kretprobe_trampoline:
    doorbell_off: 0x1026
kretprobe_trampoline:
    queue_id: 0x13
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c00000000098 
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0x14
kretprobe_trampoline:
    doorbell_off: 0x1028
kretprobe_trampoline:
    queue_id: 0x14
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c000000000a0 
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0x15
kretprobe_trampoline:
    doorbell_off: 0x102a
kretprobe_trampoline:
    queue_id: 0x15
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c000000000a8 
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0x16
kretprobe_trampoline:
    doorbell_off: 0x102c
kretprobe_trampoline:
    queue_id: 0x16
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c000000000b0 
kfd_ioctl_create_queue:
pqm_create_queue:
kfd_get_doorbell_dw_offset_in_bar:
    doorbell id: 0x17
kretprobe_trampoline:
    doorbell_off: 0x102e
kretprobe_trampoline:
    queue_id: 0x17
    doorbell_offset_in_process: 0x0
kretprobe_trampoline:
    doorbell offset 0xd134c000000000b8 

可以看到cp queue的doorbell空间是逐个递增的,这就决定了它最多增加到0x800,0x800÷8=0x100=256,从而决定cp queue最多只能创建255个,所以目前HIP层限定了cp queue最大个数为128。

再加一段

HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueue(HSAuint32 NodeId,
					  HSA_QUEUE_TYPE Type,
					  HSAuint32 QueuePercentage,
					  HSA_QUEUE_PRIORITY Priority,
					  void *QueueAddress,
					  HSAuint64 QueueSizeInBytes,
					  HsaEvent *Event,
					  HsaQueueResource *QueueResource)
{
	// .......
	args.read_pointer_address = QueueResource->QueueRptrValue;
	args.write_pointer_address = QueueResource->QueueWptrValue;
	args.ring_base_address = (uintptr_t)QueueAddress;
	args.ring_size = QueueSizeInBytes;
	args.queue_percentage = QueuePercentage;
	args.queue_priority = priority_map[Priority+3];

	err = kmtIoctl(kfd_fd, AMDKFD_IOC_CREATE_QUEUE, &args);

	if (err == -1) {
		free_queue(q);
		return HSAKMT_STATUS_ERROR;
	}

	q->queue_id = args.queue_id;
	printf("leiDebug: args.doorbell_offset: 0x%llx \n", args.doorbell_offset);

	if (IS_SOC15(q->gfxv)) {
		/* On SOC15 chips, the doorbell offset within the
		 * doorbell page is included in the doorbell offset
		 * returned by KFD. This allows CP queue doorbells to be
		 * allocated dynamically (while SDMA queue doorbells fixed)
		 * rather than based on the its process queue ID.
		 */
		doorbell_mmap_offset = args.doorbell_offset &
			~(HSAuint64)(doorbells[NodeId].size - 1);
		doorbell_offset = args.doorbell_offset &
			(doorbells[NodeId].size - 1);
	} else {
		/* On older chips, the doorbell offset within the
		 * doorbell page is based on the queue ID.
		 */
		doorbell_mmap_offset = args.doorbell_offset;
		doorbell_offset = q->queue_id * DOORBELL_SIZE(q->gfxv);
	}

	printf("leiDebug: doorbell_mmap_offset: 0x%lx \n", doorbell_mmap_offset);   
	printf("leiDebug: doorbell_offset: 0x%x \n", doorbell_offset);
	/*
	leiDebug: doorbell_mmap_offset: 0xd134c00000000000 
    leiDebug: doorbell_offset: 0x800 
	*/

	// 就是把0xd134c00000000000  map 给gpu,获得一个虚拟地址,并将之赋值给doorbells[NodeId].mapping
	err = map_doorbell(NodeId, gpu_id, doorbell_mmap_offset);
	if (err != HSAKMT_STATUS_SUCCESS) {
		hsaKmtDestroyQueue(q->queue_id);
		free_queue(q);
		return HSAKMT_STATUS_ERROR;
	}

	QueueResource->QueueId = PORT_VPTR_TO_UINT64(q);
	QueueResource->Queue_DoorBell = VOID_PTR_ADD(doorbells[NodeId].mapping,
						     doorbell_offset);
	/*
	(gdb) p/x QueueResource->Queue_DoorBell
	$21 = 0x7ffff7fba800
	(gdb) p/x doorbells[NodeId].mapping
	$22 = 0x7ffff7fba000
	(gdb) 

	*/

	return HSAKMT_STATUS_SUCCESS;
}

总结

2MB的doorbell空间,最多256个process,每个process最多可以分配1024个doorbell,每个doorbell占用8Byte。
前两个page是给KMD用的,所以第一个进程是从0x2200004000开始的。

[257021.463294] [2425430] amdgpu: Mapping doorbell page
                     target user address == 0x7F31DE3B6000
                     physical address    == 0x2200004000
                     vm_flags            == 0x42644BB
                     size                == 0x2000

第二个进程

[688365.466517] [953913] amdgpu: Mapping doorbell page
                     target user address == 0x7FFFF7FBE000
                     physical address    == 0x2200006000
                     vm_flags            == 0x42644BB
                     size                == 0x2000

每个process,分配8192Byte(0x2000)空间,1024 = 0x400。
0x0 ~ 0x100 是cp的地址空间,

[607422.170677] [1483203] amdgpu: reserved doorbell 0x100 - 0x18f
[607422.170680] [1483203] amdgpu: reserved doorbell 0x300 - 0x38f
typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT
{
	/* Compute + GFX: 0~255 */
	AMDGPU_VEGA20_DOORBELL_KIQ                     = 0x000,
	AMDGPU_VEGA20_DOORBELL_HIQ                     = 0x001,
	AMDGPU_VEGA20_DOORBELL_DIQ                     = 0x002,
	AMDGPU_VEGA20_DOORBELL_MEC_RING0               = 0x003,
	AMDGPU_VEGA20_DOORBELL_MEC_RING1               = 0x004,
	AMDGPU_VEGA20_DOORBELL_MEC_RING2               = 0x005,
	AMDGPU_VEGA20_DOORBELL_MEC_RING3               = 0x006,
	AMDGPU_VEGA20_DOORBELL_MEC_RING4               = 0x007,
	AMDGPU_VEGA20_DOORBELL_MEC_RING5               = 0x008,
	AMDGPU_VEGA20_DOORBELL_MEC_RING6               = 0x009,
	AMDGPU_VEGA20_DOORBELL_MEC_RING7               = 0x00A,
	AMDGPU_VEGA20_DOORBELL_USERQUEUE_START	       = 0x00B,
	AMDGPU_VEGA20_DOORBELL_USERQUEUE_END	       = 0x08A,
	AMDGPU_VEGA20_DOORBELL_GFX_RING0               = 0x08B,
	/* SDMA:256~335*/
	AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0            = 0x100,
	AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE1            = 0x10A,
	AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE2            = 0x114,
	AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE3            = 0x11E,
	AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE4            = 0x128,
	AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE5            = 0x132,
	AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE6            = 0x13C,
	AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE7            = 0x146,
	AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE8            = 0x150,
	/* IH: 376~391 */
	AMDGPU_VEGA20_DOORBELL_IH                      = 0x178,
	/* MMSCH: 392~407
	 * overlap the doorbell assignment with VCN as they are  mutually exclusive
	 * VCN engine's doorbell is 32 bit and two VCN ring share one QWORD
	 */
	AMDGPU_VEGA20_DOORBELL64_VCN0_1                  = 0x188, /* VNC0 */
	AMDGPU_VEGA20_DOORBELL64_VCN2_3                  = 0x189,
	AMDGPU_VEGA20_DOORBELL64_VCN4_5                  = 0x18A,
	AMDGPU_VEGA20_DOORBELL64_VCN6_7                  = 0x18B,

	AMDGPU_VEGA20_DOORBELL64_VCN8_9                  = 0x18C, /* VNC1 */
	AMDGPU_VEGA20_DOORBELL64_VCNa_b                  = 0x18D,
	AMDGPU_VEGA20_DOORBELL64_VCNc_d                  = 0x18E,
	AMDGPU_VEGA20_DOORBELL64_VCNe_f                  = 0x18F,

	AMDGPU_VEGA20_DOORBELL64_UVD_RING0_1             = 0x188,
	AMDGPU_VEGA20_DOORBELL64_UVD_RING2_3             = 0x189,
	AMDGPU_VEGA20_DOORBELL64_UVD_RING4_5             = 0x18A,
	AMDGPU_VEGA20_DOORBELL64_UVD_RING6_7             = 0x18B,

	AMDGPU_VEGA20_DOORBELL64_VCE_RING0_1             = 0x18C,
	AMDGPU_VEGA20_DOORBELL64_VCE_RING2_3             = 0x18D,
	AMDGPU_VEGA20_DOORBELL64_VCE_RING4_5             = 0x18E,
	AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7             = 0x18F,

	AMDGPU_VEGA20_DOORBELL64_FIRST_NON_CP            = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0,
	AMDGPU_VEGA20_DOORBELL64_LAST_NON_CP             = AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7,

	AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT            = 0x18F,
	AMDGPU_VEGA20_DOORBELL_INVALID                   = 0xFFFF
} AMDGPU_VEGA20_DOORBELL_ASSIGNMENT;

bitmap init的时候会把这些reserve的doorbell跳掉

static int init_doorbell_bitmap(struct qcm_process_device *qpd,
			struct kfd_dev *dev)
{
	unsigned int i;
	int range_start = dev->shared_resources.non_cp_doorbells_start;
	int range_end = dev->shared_resources.non_cp_doorbells_end;

	if (!KFD_IS_SOC15(dev))
		return 0;

#if defined(HAVE_BITMAP_FUNCS)
	qpd->doorbell_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
					     GFP_KERNEL);
#else
	qpd->doorbell_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
					     GFP_KERNEL);
#endif
	if (!qpd->doorbell_bitmap)
		return -ENOMEM;

	/* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */
	pr_debug("reserved doorbell 0x%03x - 0x%03x\n", range_start, range_end);
	pr_debug("reserved doorbell 0x%03x - 0x%03x\n",
			range_start + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
			range_end + KFD_QUEUE_DOORBELL_MIRROR_OFFSET);

	for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) {
		if (i >= range_start && i <= range_end) {
			__set_bit(i, qpd->doorbell_bitmap);
			__set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
				  qpd->doorbell_bitmap);
		}
	}

	return 0;
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值