QEMU 9.0.90 CXL Type3 mailbox 功能实现源码详解

1. 环境

1.1 qeumu版本:

~/work$ qemu_master/build/qemu-system-x86_64 --version
QEMU emulator version 9.0.90 (v9.1.0-rc0-112-g6d00c6f982)
Copyright (c) 2003-2024 Fabrice Bellard and the QEMU Project developers

1.2. Linux 内核版本

tag: 6.11.0
commit: d4560686726f7a357922f300fc81f5964be8df04

1.3. ndctl

tag: v79
commit: 16f45755f991f4fb6d76fec70a42992426c84234

1.4. CXL type3 模拟设备


root@localhost:/mnt/shared# cxl list -BDMu -d root -m mem0
{
  "bus":"root0",
  "provider":"ACPI.CXL",
  "decoders:root0":[
    {
      "decoder":"decoder0.0",
      "resource":"0x490000000",
      "size":"4.00 GiB (4.29 GB)",
      "interleave_ways":1,
      "max_available_extent":"4.00 GiB (4.29 GB)",
      "pmem_capable":true,
      "volatile_capable":true,
      "accelmem_capable":true,
      "qos_class":0,
      "nr_targets":1
    }
  ],
  "memdevs:root0":[
    {
      "memdev":"mem0",
      "pmem_size":"256.00 MiB (268.44 MB)",
      "ram_size":"256.00 MiB (268.44 MB)",
      "serial":"0x123456789",
      "host":"0000:0d:00.0"
    }
  ]
}

2. Qemu CXL functions

2.1 mailbox function

2.1.1 Get Log

QEMU 支持的 mailbox 命令, Effect 0x40 表示支持 background

CommandCombined OpcodeCmd Effect
Get Event Records0x1000x0
Clear Event Records0x1010x10
Get Event Interrupt Policy0x1020x0
Set Event Interrupt Policy0x1030x2
Get FW Info0x2000x0
Transfer FW0x2010x40
Activate FW0x2020x40
Get Timestamp0x3000x0
Set Timestamp0x3010x8
Get Supported Logs0x4000x0
Get Log0x4010x0
Get Supported Features0x5000x0
Get Feature0x5010x0
Set Feature0x5020x3e
Identify Memory Device0x40000x0
Get Partition Info0x41000x0
Get LSA0x41020x0
Set LSA0x41030x6
Get Poison List0x43000x0
Inject Poison0x43010x0
Clear Poison0x43020x0
Get Scan Media Capabilities0x43030x0
Scan Media0x43040x40
Get Scan Media Results0x43050x0
Sanitize0x44000x64
Get Security State0x45000x0

Host 侧获取支持的命令:


root@localhost:/mnt/shared# ./mxcli -d /dev/cxl/mem0 -cmd get_log
Opening Device: /dev/cxl/mem0
2024-08-12 13:26:25.636 | INFO     | mxlib.mxlibpy.cmds.mailbox.mbox:send_command:158 - Mailbox cmd=0 - ret_code=0
2024-08-12 13:26:25.639 | INFO     | mxlib.mxlibpy.cmds.mailbox.mbox:send_command:158 - Mailbox cmd=0 - ret_code=0
{
    "cel": [
        {
            "opcode": "0X100",
            "cmd_effect": "0X0"
        },
        {
            "opcode": "0X101",
            "cmd_effect": "0X10"
        },
        {
            "opcode": "0X102",
            "cmd_effect": "0X0"
        },
        {
            "opcode": "0X103",
            "cmd_effect": "0X2"
        },
        {
            "opcode": "0X200",
            "cmd_effect": "0X0"
        },
        {
            "opcode": "0X201",
            "cmd_effect": "0X40"
        },
        {
            "opcode": "0X202",
            "cmd_effect": "0X40"
        },
        {
            "opcode": "0X300",
            "cmd_effect": "0X0"
        },
        {
            "opcode": "0X301",
            "cmd_effect": "0X8"
        },
        {
            "opcode": "0X400",
            "cmd_effect": "0X0"
        },
        {
            "opcode": "0X401",
            "cmd_effect": "0X0"
        },
        {
            "opcode": "0X500",
            "cmd_effect": "0X0"
        },
        {
            "opcode": "0X501",
            "cmd_effect": "0X0"
        },
        {
            "opcode": "0X502",
            "cmd_effect": "0X3E"
        },
        {
            "opcode": "0X4000",
            "cmd_effect": "0X0"
        },
        {
            "opcode": "0X4100",
            "cmd_effect": "0X0"
        },
        {
            "opcode": "0X4102",
            "cmd_effect": "0X0"
        },
        {
            "opcode": "0X4103",
            "cmd_effect": "0X6"
        },
        {
            "opcode": "0X4300",
            "cmd_effect": "0X0"
        },
        {
            "opcode": "0X4301",
            "cmd_effect": "0X0"
        },
        {
            "opcode": "0X4302",
            "cmd_effect": "0X0"
        },
        {
            "opcode": "0X4303",
            "cmd_effect": "0X0"
        },
        {
            "opcode": "0X4304",
            "cmd_effect": "0X40"
        },
        {
            "opcode": "0X4305",
            "cmd_effect": "0X0"
        },
        {
            "opcode": "0X4400",
            "cmd_effect": "0X64"
        },
        {
            "opcode": "0X4500",
            "cmd_effect": "0X0"
        }
    ]
}


源码定义: qemu_master/qemu/hw/cxl/cxl-mailbox-utils.c


static const struct cxl_cmd cxl_cmd_set[256][256] = {
    [EVENTS][GET_RECORDS] = { "EVENTS_GET_RECORDS",
        cmd_events_get_records, 1, 0 },
    [EVENTS][CLEAR_RECORDS] = { "EVENTS_CLEAR_RECORDS",
        cmd_events_clear_records, ~0, CXL_MBOX_IMMEDIATE_LOG_CHANGE },
    [EVENTS][GET_INTERRUPT_POLICY] = { "EVENTS_GET_INTERRUPT_POLICY",
                                      cmd_events_get_interrupt_policy, 0, 0 },
    [EVENTS][SET_INTERRUPT_POLICY] = { "EVENTS_SET_INTERRUPT_POLICY",
                                      cmd_events_set_interrupt_policy,
                                      ~0, CXL_MBOX_IMMEDIATE_CONFIG_CHANGE },
    [FIRMWARE_UPDATE][GET_INFO] = { "FIRMWARE_UPDATE_GET_INFO",
        cmd_firmware_update_get_info, 0, 0 },
    [FIRMWARE_UPDATE][TRANSFER] = { "FIRMWARE_UPDATE_TRANSFER",
        cmd_firmware_update_transfer, ~0, CXL_MBOX_BACKGROUND_OPERATION },
    [FIRMWARE_UPDATE][ACTIVATE] = { "FIRMWARE_UPDATE_ACTIVATE",
        cmd_firmware_update_activate, 2, CXL_MBOX_BACKGROUND_OPERATION },
    [TIMESTAMP][GET] = { "TIMESTAMP_GET", cmd_timestamp_get, 0, 0 },
    [TIMESTAMP][SET] = { "TIMESTAMP_SET", cmd_timestamp_set,
                         8, CXL_MBOX_IMMEDIATE_POLICY_CHANGE },
    [LOGS][GET_SUPPORTED] = { "LOGS_GET_SUPPORTED", cmd_logs_get_supported,
                              0, 0 },
    [LOGS][GET_LOG] = { "LOGS_GET_LOG", cmd_logs_get_log, 0x18, 0 },
    [FEATURES][GET_SUPPORTED] = { "FEATURES_GET_SUPPORTED",
                                  cmd_features_get_supported, 0x8, 0 },
    [FEATURES][GET_FEATURE] = { "FEATURES_GET_FEATURE",
                                cmd_features_get_feature, 0x15, 0 },
    [FEATURES][SET_FEATURE] = { "FEATURES_SET_FEATURE",
                                cmd_features_set_feature,
                                ~0,
                                (CXL_MBOX_IMMEDIATE_CONFIG_CHANGE |
                                 CXL_MBOX_IMMEDIATE_DATA_CHANGE |
                                 CXL_MBOX_IMMEDIATE_POLICY_CHANGE |
                                 CXL_MBOX_IMMEDIATE_LOG_CHANGE |
                                 CXL_MBOX_SECURITY_STATE_CHANGE)},
    [IDENTIFY][MEMORY_DEVICE] = { "IDENTIFY_MEMORY_DEVICE",
        cmd_identify_memory_device, 0, 0 },
    [CCLS][GET_PARTITION_INFO] = { "CCLS_GET_PARTITION_INFO",
        cmd_ccls_get_partition_info, 0, 0 },
    [CCLS][GET_LSA] = { "CCLS_GET_LSA", cmd_ccls_get_lsa, 8, 0 },
    [CCLS][SET_LSA] = { "CCLS_SET_LSA", cmd_ccls_set_lsa,
        ~0, CXL_MBOX_IMMEDIATE_CONFIG_CHANGE | CXL_MBOX_IMMEDIATE_DATA_CHANGE },
    [SANITIZE][OVERWRITE] = { "SANITIZE_OVERWRITE", cmd_sanitize_overwrite, 0,
        (CXL_MBOX_IMMEDIATE_DATA_CHANGE |
         CXL_MBOX_SECURITY_STATE_CHANGE |
         CXL_MBOX_BACKGROUND_OPERATION)},
    [PERSISTENT_MEM][GET_SECURITY_STATE] = { "GET_SECURITY_STATE",
        cmd_get_security_state, 0, 0 },
    [MEDIA_AND_POISON][GET_POISON_LIST] = { "MEDIA_AND_POISON_GET_POISON_LIST",
        cmd_media_get_poison_list, 16, 0 },
    [MEDIA_AND_POISON][INJECT_POISON] = { "MEDIA_AND_POISON_INJECT_POISON",
        cmd_media_inject_poison, 8, 0 },
    [MEDIA_AND_POISON][CLEAR_POISON] = { "MEDIA_AND_POISON_CLEAR_POISON",
        cmd_media_clear_poison, 72, 0 },
    [MEDIA_AND_POISON][GET_SCAN_MEDIA_CAPABILITIES] = {
        "MEDIA_AND_POISON_GET_SCAN_MEDIA_CAPABILITIES",
        cmd_media_get_scan_media_capabilities, 16, 0 },
    [MEDIA_AND_POISON][SCAN_MEDIA] = { "MEDIA_AND_POISON_SCAN_MEDIA",
        cmd_media_scan_media, 17, CXL_MBOX_BACKGROUND_OPERATION },
    [MEDIA_AND_POISON][GET_SCAN_MEDIA_RESULTS] = {
        "MEDIA_AND_POISON_GET_SCAN_MEDIA_RESULTS",
        cmd_media_get_scan_media_results, 0, 0 },
};

2.1.2 Get Event Interrupt Policy

获取每种事件日志的中断设置
Ref CXL r3.1 Table 8-55. Get Event Interrupt Policy Output Payload

// qemu_master/qemu/hw/cxl/cxl-mailbox-utils.c
static CXLRetCode cmd_events_get_interrupt_policy(const struct cxl_cmd *cmd,
                                                  uint8_t *payload_in,
                                                  size_t len_in,
                                                  uint8_t *payload_out,
                                                  size_t *len_out,
                                                  CXLCCI *cci)
{
    CXLDeviceState *cxlds = &CXL_TYPE3(cci->d)->cxl_dstate;
    CXLEventInterruptPolicy *policy;
    CXLEventLog *log;

    policy = (CXLEventInterruptPolicy *)payload_out;

    log = &cxlds->event_logs[CXL_EVENT_TYPE_INFO];
    if (log->irq_enabled) {
        // 如果中断使能,返回各个字段
        // #define CXL_EVENT_INT_SETTING(vector) ((((uint8_t)vector & 0xf) << 4) | CXL_INT_MSI_MSIX)
        // interrupt mode 都为 CXL_INT_MSI_MSIX
        // intertupt message number 从 2 开始累加, 初始化在 cxl_event_init()
        policy->info_settings = CXL_EVENT_INT_SETTING(log->irq_vec);
    }

    log = &cxlds->event_logs[CXL_EVENT_TYPE_WARN];
    if (log->irq_enabled) {
        policy->warn_settings = CXL_EVENT_INT_SETTING(log->irq_vec);
    }

    log = &cxlds->event_logs[CXL_EVENT_TYPE_FAIL];
    if (log->irq_enabled) {
        policy->failure_settings = CXL_EVENT_INT_SETTING(log->irq_vec);
    }

    log = &cxlds->event_logs[CXL_EVENT_TYPE_FATAL];
    if (log->irq_enabled) {
        policy->fatal_settings = CXL_EVENT_INT_SETTING(log->irq_vec);
    }

    log = &cxlds->event_logs[CXL_EVENT_TYPE_DYNAMIC_CAP];
    if (log->irq_enabled) {
        /* Dynamic Capacity borrows the same vector as info */
        // 中断号与 info 一样
        // ? 返回给host 的应该是 0
        policy->dyn_cap_settings = CXL_INT_MSI_MSIX;
    }

    *len_out = sizeof(*policy);
    return CXL_MBOX_SUCCESS;
}

2.1.3 Set Event Interrupt Policy

设置中断策略, 设置对应类型的日志 irq_enabled 是否使能;

// qemu_master/qemu/hw/cxl/cxl-mailbox-utils.c
static CXLRetCode cmd_events_set_interrupt_policy(const struct cxl_cmd *cmd,
                                                  uint8_t *payload_in,
                                                  size_t len_in,
                                                  uint8_t *payload_out,
                                                  size_t *len_out,
                                                  CXLCCI *cci)
{
    CXLDeviceState *cxlds = &CXL_TYPE3(cci->d)->cxl_dstate;
    CXLEventInterruptPolicy *policy;
    CXLEventLog *log;

    if (len_in < CXL_EVENT_INT_SETTING_MIN_LEN) {
        return CXL_MBOX_INVALID_PAYLOAD_LENGTH;
    }

    policy = (CXLEventInterruptPolicy *)payload_in;

    log = &cxlds->event_logs[CXL_EVENT_TYPE_INFO];  
    // 暂时只支持 MSI/MSI-X
    // 不能设置 FW interrupt message Number
    // 只有 interrupt mode = FW interrupt 时, FW interrupt message Number 才有效
    // Ref CXL r3.1 Table 8-56. Set Event Interrupt Policy Input Payload
    log->irq_enabled = (policy->info_settings & CXL_EVENT_INT_MODE_MASK) ==
                        CXL_INT_MSI_MSIX;

    log = &cxlds->event_logs[CXL_EVENT_TYPE_WARN];
    log->irq_enabled = (policy->warn_settings & CXL_EVENT_INT_MODE_MASK) ==
                        CXL_INT_MSI_MSIX;

    log = &cxlds->event_logs[CXL_EVENT_TYPE_FAIL];
    log->irq_enabled = (policy->failure_settings & CXL_EVENT_INT_MODE_MASK) ==
                        CXL_INT_MSI_MSIX;

    log = &cxlds->event_logs[CXL_EVENT_TYPE_FATAL];
    log->irq_enabled = (policy->fatal_settings & CXL_EVENT_INT_MODE_MASK) ==
                        CXL_INT_MSI_MSIX;

    /* DCD is optional */
    // Dynamic Capacity Event Log Interrupt Settings 可以不包含在 input payload length 里
    if (len_in < sizeof(*policy)) {
        return CXL_MBOX_SUCCESS;
    }
    // 如果 dyn_cap_settings 没设置
    // cmd_events_get_interrupt_policy policy->dyn_cap_settings或返回空,目前内核没有对dynamic cap 这个中断策略进行设置
    // linux/drivers/cxl/pci.c cxl_event_irqsetup 只对 4 个日志类型进行了中断注册申请
    log = &cxlds->event_logs[CXL_EVENT_TYPE_DYNAMIC_CAP];
    log->irq_enabled = (policy->dyn_cap_settings & CXL_EVENT_INT_MODE_MASK) ==
                        CXL_INT_MSI_MSIX;

    *len_out = 0;
    return CXL_MBOX_SUCCESS;
}

2.1.4 Get FW Info
/* CXL r3.1 Section 8.2.9.3.1: Get FW Info (Opcode 0200h) */
// qemu_master/qemu/hw/cxl/cxl-mailbox-utils.c
static CXLRetCode cmd_firmware_update_get_info(const struct cxl_cmd *cmd,
                                               uint8_t *payload_in,
                                               size_t len,
                                               uint8_t *payload_out,
                                               size_t *len_out,
                                               CXLCCI *cci)
{
    CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
    CXLDeviceState *cxl_dstate = &ct3d->cxl_dstate;
    struct {
        uint8_t slots_supported;
        uint8_t slot_info;
        uint8_t caps;
        uint8_t rsvd[0xd];
        char fw_rev1[0x10];
        char fw_rev2[0x10];
        char fw_rev3[0x10];
        char fw_rev4[0x10];
    } QEMU_PACKED *fw_info;
    QEMU_BUILD_BUG_ON(sizeof(*fw_info) != 0x50);

    // 这三点必须都满足, 否则报错
    // ? 未找到出处
    if ((cxl_dstate->vmem_size < CXL_CAPACITY_MULTIPLIER) ||
        (cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER) ||
        (ct3d->dc.total_capacity < CXL_CAPACITY_MULTIPLIER)) {
        return CXL_MBOX_INTERNAL_ERROR;
    }

    fw_info = (void *)payload_out;
    
    // #define CXL_FW_SLOTS 2
    // 支持 2 个 slot, slot 从 1开始编号
    fw_info->slots_supported = CXL_FW_SLOTS;
    // Ref CXL r3.1  Table 8-65. Get FW Info Output Payload
    // bit[2:0] 表示激活的 FW version 的 slot number
    // bit[5:3] 表示下一次复位或者正确执行 activate fw 命令后,将要激活的 fw slot number
    fw_info->slot_info = (cci->fw.active_slot & 0x7) |
            ((cci->fw.staged_slot & 0x7) << 3);
    
    // bit0 置1 表示设备支持使用 activatge FW 命令在线激活 FW
    // fw_info->caps 在 qemu 中暂未使用
    fw_info->caps = BIT(0);  /* online update supported */

    // 两个 FW 版本写死
    // 如果没有 slot,对应区域应该为0
    if (cci->fw.slot[0]) {
        pstrcpy(fw_info->fw_rev1, sizeof(fw_info->fw_rev1), "BWFW VERSION 0");
    }
    if (cci->fw.slot[1]) {
        pstrcpy(fw_info->fw_rev2, sizeof(fw_info->fw_rev2), "BWFW VERSION 1");
    }

    *len_out = sizeof(*fw_info);
    return CXL_MBOX_SUCCESS;
}

2.1.5 Transfer FW
// qemu_master/qemu/hw/cxl/cxl-mailbox-utils.c
static CXLRetCode cmd_firmware_update_transfer(const struct cxl_cmd *cmd,
                                               uint8_t *payload_in,
                                               size_t len,
                                               uint8_t *payload_out,
                                               size_t *len_out,
                                               CXLCCI *cci)
{
    struct {
        uint8_t action;
        uint8_t slot;
        uint8_t rsvd1[2];
        uint32_t offset;
        uint8_t rsvd2[0x78];
        uint8_t data[];
    } QEMU_PACKED *fw_transfer = (void *)payload_in;
    size_t offset, length;

    // Ref CXL r3.1 Table 8-66. Transfer FW Input Payload
    if (fw_transfer->action == CXL_FW_XFER_ACTION_ABORT) {
        // 如果 action = Abort Transfer = 04h
        /*
         * At this point there aren't any on-going transfers
         * running in the bg - this is serialized before this
         * call altogether. Just mark the state machine and
         * disregard any other input.
         */
        cci->fw.transferring = false;
        // 返回 CXL_MBOX_SUCCESS ,不会启动 bg 定时器, Ref 2.2 mailbox data handle
        return CXL_MBOX_SUCCESS;
    }

    // #define CXL_FW_XFER_ALIGNMENT   128
    // Ref CXL r3.1 section 8.2.9.3.2: Transfer FW (Opcode 0201h)
    // FW packages and all parts of each FW package shall be 128-byte aligned
    // offset 是包的偏移, 每个包 128字节
    offset = fw_transfer->offset * CXL_FW_XFER_ALIGNMENT;
    // len 是 input payload 长度, 减掉头,剩余即将要写入的 fw 字节
    length = len - sizeof(*fw_transfer);
    // #define CXL_FW_SIZE  0x02000000 /* 32 mb */
    // QEMU CXL FW 最大 32M
    if (offset + length > CXL_FW_SIZE) {
        return CXL_MBOX_INVALID_INPUT;
    }

    if (cci->fw.transferring) {
        // #define CXL_FW_XFER_ACTION_FULL     0x0
        // #define CXL_FW_XFER_ACTION_INIT     0x1
        // CXL_FW_XFER_ACTION_FULL 是一次性全部传输
        // CXL_FW_XFER_ACTION_INIT 是分包传输的第一个包
        // cci->fw.transferring 是在收到 CXL_FW_XFER_ACTION_INIT 之后置 true 的,如果有又收到了full或者init,返回 in progress
        if (fw_transfer->action == CXL_FW_XFER_ACTION_FULL ||
            fw_transfer->action == CXL_FW_XFER_ACTION_INIT) {
            return CXL_MBOX_FW_XFER_IN_PROGRESS;
        }
        /*
         * Abort partitioned package transfer if over 30 secs
         * between parts. As opposed to the explicit ABORT action,
         * semantically treat this condition as an error - as
         * if a part action were passed without a previous INIT.
         */
         // fw.last_partxfer 是在处理 init和continue 的时候记录的时间戳
         // 如果距离上次处理超过 30S, 返回 CXL_MBOX_INVALID_INPUT 报错
         // 这个 30s 是 host 两包之间下发超过了30s,或者设备接收两包超过了 30S
         // 也可以理解为上次传输中断时间过长,超过 30s
        if (difftime(time(NULL), cci->fw.last_partxfer) > 30.0) {
            cci->fw.transferring = false;
            return CXL_MBOX_INVALID_INPUT;
        }
    } else if (fw_transfer->action == CXL_FW_XFER_ACTION_CONTINUE ||
               fw_transfer->action == CXL_FW_XFER_ACTION_END) {
        // 首先应该收到 action init, 其他的无效
        // init-> continue -> continue -> ...-> end
        return CXL_MBOX_INVALID_INPUT;
    }

    /* allow back-to-back retransmission */
    // offset 变量 与 fw.prev_offset 都是偏移字节
    // 前后两次传输范围可以重复,但是不能重叠
    if ((offset != cci->fw.prev_offset || length != cci->fw.prev_len) &&
        (fw_transfer->action == CXL_FW_XFER_ACTION_CONTINUE ||
         fw_transfer->action == CXL_FW_XFER_ACTION_END)) {
        /* verify no overlaps */
        if (offset < cci->fw.prev_offset + cci->fw.prev_len) {
            return CXL_MBOX_FW_XFER_OUT_OF_ORDER;
        }
    }

    switch (fw_transfer->action) {
    case CXL_FW_XFER_ACTION_FULL: /* ignores offset */
    case CXL_FW_XFER_ACTION_END:
        // input payload 中 slot 字段只有在  Action = Full 和 End 时有效
        // slot 从 1 开始编号, 0 无效
        // 也不能跟当前激活的 slot 相等
        // 也不能大于支持的最大 slot, CXL_FW_SLOTS = 2
        if (fw_transfer->slot == 0 ||
            fw_transfer->slot == cci->fw.active_slot ||
            fw_transfer->slot > CXL_FW_SLOTS) {
            return CXL_MBOX_FW_INVALID_SLOT;
        }

        /* mark the slot used upon bg completion */
        break;
    case CXL_FW_XFER_ACTION_INIT:
        // 从 0 开始传输 FW
        if (offset != 0) {
            return CXL_MBOX_INVALID_INPUT;
        }
        // 设置并保存相关变量
        cci->fw.transferring = true;
        cci->fw.prev_offset = offset;
        cci->fw.prev_len = length;
        break;
    case CXL_FW_XFER_ACTION_CONTINUE:
        // 设置并保存相关变量
        // offset 前面已经检查过了
        cci->fw.prev_offset = offset;
        cci->fw.prev_len = length;
        break;
    default:
        return CXL_MBOX_INVALID_INPUT;
    }

    if (fw_transfer->action == CXL_FW_XFER_ACTION_FULL) {
        // 后台命令运行 10S, 模拟运行, burn FW 耗费时间 10S
        cci->bg.runtime = 10 * 1000UL;
    } else {
        // 后台命令运行 2S, 模拟运行
        cci->bg.runtime = 2 * 1000UL;
    }
    /* keep relevant context for bg completion */
    // 保存相关上下文
    cci->fw.curr_action = fw_transfer->action;
    cci->fw.curr_slot = fw_transfer->slot;
    *len_out = 0;

    return CXL_MBOX_BG_STARTED;
}

2.1.6 Activate FW

/* CXL r3.1 section 8.2.9.3.3: Activate FW (Opcode 0202h) */
static CXLRetCode cmd_firmware_update_activate(const struct cxl_cmd *cmd,
                                               uint8_t *payload_in,
                                               size_t len,
                                               uint8_t *payload_out,
                                               size_t *len_out,
                                               CXLCCI *cci)
{
    struct {
        uint8_t action;
        uint8_t slot;
    } QEMU_PACKED *fw_activate = (void *)payload_in;
    QEMU_BUILD_BUG_ON(sizeof(*fw_activate) != 0x2);
    // slot 从 1 开始编号
    // 不能激活当前已经激活的 slot
    // 不能激活不支持的 slot
    if (fw_activate->slot == 0 ||
        fw_activate->slot == cci->fw.active_slot ||
        fw_activate->slot > CXL_FW_SLOTS) {
        return CXL_MBOX_FW_INVALID_SLOT;
    }

    /* ensure that an actual fw package is there */
    // 存在有效的 FW 时,才会置 True, Ref __do_firmware_xfer()
    if (!cci->fw.slot[fw_activate->slot - 1]) {
        return CXL_MBOX_FW_INVALID_SLOT;
    }

    // Ref CXL r3.1 Table 8-67. Activate FW Input Payload
    // Byte0: 00 :Online 01: on the next reset
    // 这两个值可以使用 get fw info 获取
    // staged_slot 没有在其他地方使用 
    switch (fw_activate->action) {
    case 0: /* online */
        cci->fw.active_slot = fw_activate->slot;
        break;
    case 1: /* reset */
        cci->fw.staged_slot = fw_activate->slot;
        break;
    default:
        return CXL_MBOX_INVALID_INPUT;
    }

    return CXL_MBOX_SUCCESS;
}

2.1.7 Get Timestamp
2.1.8 Set Timestamp
// 设置时间戳保存设备和host 时间在 cxl_dstate->timestamp.last_set 与  cxl_dstate->timestamp.host_set
/* CXL r3.1 Section 8.2.9.4.2: Set Timestamp (Opcode 0301h) */
static CXLRetCode cmd_timestamp_set(const struct cxl_cmd *cmd,
                                    uint8_t *payload_in,
                                    size_t len_in,
                                    uint8_t *payload_out,
                                    size_t *len_out,
                                    CXLCCI *cci)
{
    CXLDeviceState *cxl_dstate = &CXL_TYPE3(cci->d)->cxl_dstate;

    cxl_dstate->timestamp.set = true;
    cxl_dstate->timestamp.last_set = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);

    cxl_dstate->timestamp.host_set = le64_to_cpu(*(uint64_t *)payload_in);

    *len_out = 0;
    return CXL_MBOX_SUCCESS;
}

// 获取时间戳时,通过本地时间的间隔,计算 host 的当前时间戳
/* CXL r3.1 Section 8.2.9.4.1: Get Timestamp (Opcode 0300h) */
static CXLRetCode cmd_timestamp_get(const struct cxl_cmd *cmd,
                                    uint8_t *payload_in,
                                    size_t len_in,
                                    uint8_t *payload_out,
                                    size_t *len_out,
                                    CXLCCI *cci)
{
    CXLDeviceState *cxl_dstate = &CXL_TYPE3(cci->d)->cxl_dstate;
    uint64_t final_time = cxl_device_get_timestamp(cxl_dstate);

    stq_le_p(payload_out, final_time);
    *len_out = 8;

    return CXL_MBOX_SUCCESS;
}

uint64_t cxl_device_get_timestamp(CXLDeviceState *cxl_dstate)
{
    uint64_t time, delta;
    uint64_t final_time = 0;

    if (cxl_dstate->timestamp.set) {
        /* Find the delta from the last time the host set the time. */
        time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
        // 计算本地计时的时间间隔
        delta = time - cxl_dstate->timestamp.last_set;
        // 返回 host 的时间戳
        final_time = cxl_dstate->timestamp.host_set + delta;
    }

    return final_time;
}

2.1.9 Get Security State
2.1.10 Get Supported Features

内核暂不支持


/* CXL r3.1 section 8.2.9.6.1: Get Supported Features (Opcode 0500h) */
static CXLRetCode cmd_features_get_supported(const struct cxl_cmd *cmd,
                                             uint8_t *payload_in,
                                             size_t len_in,
                                             uint8_t *payload_out,
                                             size_t *len_out,
                                             CXLCCI *cci)
{
    struct {
        uint32_t count;
        uint16_t start_index;
        uint16_t reserved;
    } QEMU_PACKED QEMU_ALIGNED(16) * get_feats_in = (void *)payload_in;

    struct {
        CXLSupportedFeatureHeader hdr;
        CXLSupportedFeatureEntry feat_entries[];
    } QEMU_PACKED QEMU_ALIGNED(16) * get_feats_out = (void *)payload_out;
    uint16_t index, req_entries;
    uint16_t entry;

    // CXL Type3 设备才支持
    if (!object_dynamic_cast(OBJECT(cci->d), TYPE_CXL_TYPE3)) {
        return CXL_MBOX_UNSUPPORTED;
    }

    // enum CXL_SUPPORTED_FEATURES_LIST {
    //   CXL_FEATURE_PATROL_SCRUB = 0,
    //   CXL_FEATURE_ECS,
    //   CXL_FEATURE_MAX
    // };

    if (get_feats_in->count < sizeof(CXLSupportedFeatureHeader) ||
        get_feats_in->start_index >= CXL_FEATURE_MAX) {
        return CXL_MBOX_INVALID_INPUT;
    }

    req_entries = (get_feats_in->count -
                   sizeof(CXLSupportedFeatureHeader)) /
                   sizeof(CXLSupportedFeatureEntry);
    req_entries = MIN(req_entries,
                      (CXL_FEATURE_MAX - get_feats_in->start_index));

    for (entry = 0, index = get_feats_in->start_index;
         entry < req_entries; index++) {
        switch (index) {
        case  CXL_FEATURE_PATROL_SCRUB:
            /* Fill supported feature entry for device patrol scrub control */
            // Ref CXL r3.1 Table 8-97. Get Supported Features Supported Feature Entry
            // 填充 feature entry
            get_feats_out->feat_entries[entry++] =
                           (struct CXLSupportedFeatureEntry) {
                .uuid = patrol_scrub_uuid,
                .feat_index = index,
                .get_feat_size = sizeof(CXLMemPatrolScrubReadAttrs),
                .set_feat_size = sizeof(CXLMemPatrolScrubWriteAttrs),
                .attr_flags = CXL_FEAT_ENTRY_ATTR_FLAG_CHANGABLE,
                .get_feat_version = CXL_MEMDEV_PS_GET_FEATURE_VERSION,
                .set_feat_version = CXL_MEMDEV_PS_SET_FEATURE_VERSION,
                .set_feat_effects = CXL_FEAT_ENTRY_SFE_IMMEDIATE_CONFIG_CHANGE |
                                    CXL_FEAT_ENTRY_SFE_CEL_VALID,
            };
            break;
        case  CXL_FEATURE_ECS:
            /* Fill supported feature entry for device DDR5 ECS control */
            get_feats_out->feat_entries[entry++] =
                         (struct CXLSupportedFeatureEntry) {
                .uuid = ecs_uuid,
                .feat_index = index,
                .get_feat_size = CXL_ECS_NUM_MEDIA_FRUS *
                                    sizeof(CXLMemECSReadAttrs),
                .set_feat_size = CXL_ECS_NUM_MEDIA_FRUS *
                                    sizeof(CXLMemECSWriteAttrs),
                .attr_flags = CXL_FEAT_ENTRY_ATTR_FLAG_CHANGABLE,
                .get_feat_version = CXL_ECS_GET_FEATURE_VERSION,
                .set_feat_version = CXL_ECS_SET_FEATURE_VERSION,
                .set_feat_effects = CXL_FEAT_ENTRY_SFE_IMMEDIATE_CONFIG_CHANGE |
                                    CXL_FEAT_ENTRY_SFE_CEL_VALID,
            };
            break;
        default:
            __builtin_unreachable();
        }
    }
    // Ref CXL r3.1 Table 8-96. Get Supported Features Output Payload
    // 数量和 entries
    get_feats_out->hdr.nsuppfeats_dev = CXL_FEATURE_MAX;
    get_feats_out->hdr.entries = req_entries;
    *len_out = sizeof(CXLSupportedFeatureHeader) +
                      req_entries * sizeof(CXLSupportedFeatureEntry);

    return CXL_MBOX_SUCCESS;
}

2.1.11 Get Event Records

根据事件类型,获取相应的日志

  1. cmd_events_get_records() qemu_master/qemu/hw/cxl/cxl-mailbox-utils.c

static CXLRetCode cmd_events_get_records(const struct cxl_cmd *cmd,
                                         uint8_t *payload_in, size_t len_in,
                                         uint8_t *payload_out, size_t *len_out,
                                         CXLCCI *cci)
{
    CXLDeviceState *cxlds = &CXL_TYPE3(cci->d)->cxl_dstate;
    CXLGetEventPayload *pl;
    uint8_t log_type;
    int max_recs;
    // get record input payload 一个字节的事件类型, 范围 0 - 4
    if (cmd->in < sizeof(log_type)) {
        return CXL_MBOX_INVALID_INPUT;
    }

    log_type = payload_in[0];

    pl = (CXLGetEventPayload *)payload_out;

    //#define CXL_EVENT_RECORD_DATA_LENGTH 0x50
    // typedef struct CXLEventRecordRaw {
    //    CXLEventRecordHdr hdr;
    //    uint8_t data[CXL_EVENT_RECORD_DATA_LENGTH];
    // } QEMU_PACKED CXLEventRecordRaw;
    // #define CXL_EVENT_RECORD_SIZE (sizeof(CXLEventRecordRaw))
    // CXL r3.1 Table 8-43. Common Event Record Format, 数据最长 0x50
    // CXL_EVENT_PAYLOAD_HDR_SIZE 头的大小
    max_recs = (cxlds->payload_size - CXL_EVENT_PAYLOAD_HDR_SIZE) /
                CXL_EVENT_RECORD_SIZE;
    if (max_recs > 0xFFFF) {
        max_recs = 0xFFFF;
    }

    return cxl_event_get_records(cxlds, pl, log_type, max_recs, len_out);
}


CXLRetCode cxl_event_get_records(CXLDeviceState *cxlds, CXLGetEventPayload *pl,
                                 uint8_t log_type, int max_recs,
                                 size_t *len)
{
    CXLEventLog *log;
    CXLEvent *entry;
    uint16_t nr;
    // 事件类型,不能大于 4
    // CXL_EVENT_TYPE_MAX = 5
    if (log_type >= CXL_EVENT_TYPE_MAX) {
        return CXL_MBOX_INVALID_INPUT;
    }

    log = &cxlds->event_logs[log_type];
    // 对应日志 加锁
    QEMU_LOCK_GUARD(&log->lock);
    // 获取链表头
    entry = cxl_event_get_head(log);
    // 遍历日志,最大个数 max_recs
    for (nr = 0; entry && nr < max_recs; nr++) {
        memcpy(&pl->records[nr], &entry->data, CXL_EVENT_RECORD_SIZE);
        entry = cxl_event_get_next(entry);
    }
    
    // ref CXL r3.1 Table 8-53. Get Event Records Output Payload
    // 如果日志还没读完, bit1 置1, 表示还有更多的事件要读
    // Host 应该继续使用  get event record 命令读取日志
    if (!cxl_event_empty(log)) {
        pl->flags |= CXL_GET_EVENT_FLAG_MORE_RECORDS;
    }

    if (log->overflow_err_count) {
        // 每种事件最多8条日志,多了溢出
        // bit0 置1,表示日志太多,不能存更多日志了
        pl->flags |= CXL_GET_EVENT_FLAG_OVERFLOW;
        // overflow_err_count 由于满不能保存的日志数量
        pl->overflow_err_count = cpu_to_le16(log->overflow_err_count);
        // 第一次溢出时,产生日志的时间
        pl->first_overflow_timestamp =
            cpu_to_le64(log->first_overflow_timestamp);
        // 最后一次溢出的日志时间
        pl->last_overflow_timestamp =
            cpu_to_le64(log->last_overflow_timestamp);
    }
    // record_count 事件记录的日志数量
    pl->record_count = cpu_to_le16(nr);
    // 返回负载,头 + 日志记录
    *len = CXL_EVENT_PAYLOAD_HDR_SIZE + (CXL_EVENT_RECORD_SIZE * nr);

    return CXL_MBOX_SUCCESS;
}
  1. cxlds->event_logs[CXL_EVENT_TYPE_MAX] 每项表示一个事件类型
typedef enum CXLEventLogType {
    CXL_EVENT_TYPE_INFO          = 0,
    CXL_EVENT_TYPE_WARN          = 1,
    CXL_EVENT_TYPE_FAIL          = 2,
    CXL_EVENT_TYPE_FATAL         = 3,
    CXL_EVENT_TYPE_DYNAMIC_CAP   = 4,
    CXL_EVENT_TYPE_MAX
} CXLEventLogType;
  1. cxl_event_insert() 插入日志到相应类型的事件链表中 log->events, qemu_master/qemu/hw/cxl/cxl-events.c, 以下事件插入日志:
    1. qmp_cxl_inject_general_media_event()
    2. qmp_cxl_inject_dram_event()
    3. qmp_cxl_inject_memory_module_event()
    4. qmp_cxl_process_dynamic_capacity_prescriptive()
bool cxl_event_insert(CXLDeviceState *cxlds, CXLEventLogType log_type,
                      CXLEventRecordRaw *event)
{
    uint64_t time;
    CXLEventLog *log;
    CXLEvent *entry;
    // 日志类型检查,不能大于4
    if (log_type >= CXL_EVENT_TYPE_MAX) {
        return false;
    }

    time = cxl_device_get_timestamp(cxlds);

    log = &cxlds->event_logs[log_type];

    QEMU_LOCK_GUARD(&log->lock);
    // #define CXL_TEST_EVENT_OVERFLOW 8
    // 当日志个数大于等于 8 时,溢出
    if (cxl_event_count(log) >= CXL_TEST_EVENT_OVERFLOW) {
        if (log->overflow_err_count == 0) {
            // 记录第一次溢出的时间
            log->first_overflow_timestamp = time;
        }
        // 记录个数
        log->overflow_err_count++;
        // 记录最后一个溢出的事件时间
        log->last_overflow_timestamp = time;
        return false;
    }

    // 申请日志空间内存
    entry = g_new0(CXLEvent, 1);

    memcpy(&entry->data, event, sizeof(*event));

    entry->data.hdr.handle = cpu_to_le16(log->next_handle);
    log->next_handle++;
    /* 0 handle is never valid */
    if (log->next_handle == 0) {
        log->next_handle++;
    }
    entry->data.hdr.timestamp = cpu_to_le64(time);
    // 插入对应日志链表
    QSIMPLEQ_INSERT_TAIL(&log->events, entry, node);
    cxl_event_set_status(cxlds, log_type, true);

    /* Count went from 0 to 1 */
    return cxl_event_count(log) == 1;
}
2.1.12 Clear Event Records

清除日志记录

static CXLRetCode cmd_events_clear_records(const struct cxl_cmd *cmd,
                                           uint8_t *payload_in,
                                           size_t len_in,
                                           uint8_t *payload_out,
                                           size_t *len_out,
                                           CXLCCI *cci)
{
    CXLDeviceState *cxlds = &CXL_TYPE3(cci->d)->cxl_dstate;
    CXLClearEventPayload *pl;

    pl = (CXLClearEventPayload *)payload_in;
    // 返回没有负载
    *len_out = 0;
    return cxl_event_clear_records(cxlds, pl);
}


CXLRetCode cxl_event_clear_records(CXLDeviceState *cxlds,
                                   CXLClearEventPayload *pl)
{
    CXLEventLog *log;
    uint8_t log_type;
    CXLEvent *entry;
    int nr;

    log_type = pl->event_log;
    // 检查日志类型,不能大于 4
    if (log_type >= CXL_EVENT_TYPE_MAX) {
        return CXL_MBOX_INVALID_INPUT;
    }

    log = &cxlds->event_logs[log_type];

    QEMU_LOCK_GUARD(&log->lock);
    /*
     * Must iterate the queue twice.
     * "The device shall verify the event record handles specified in the input
     * payload are in temporal order. If the device detects an older event
     * record that will not be cleared when Clear Event Records is executed,
     * the device shall return the Invalid Handle return code and shall not
     * clear any of the specified event records."
     *   -- CXL r3.1 Section 8.2.9.2.3: Clear Event Records (0101h)
     */
    entry = cxl_event_get_head(log);
    // 获取链表头, 遍历链表
    // pl->nr_recs number of event record handles
    for (nr = 0; entry && nr < pl->nr_recs; nr++) {
        uint16_t handle = pl->handle[nr];

        /* NOTE: Both handles are little endian. */
        // 设备应该按顺序检查 handle
        if (handle == 0 || entry->data.hdr.handle != handle) {
            return CXL_MBOX_INVALID_INPUT;
        }
        entry = cxl_event_get_next(entry);
    }
    // 检查通过,删除日志记录
    entry = cxl_event_get_head(log);
    for (nr = 0; entry && nr < pl->nr_recs; nr++) {
        cxl_event_delete_head(cxlds, log_type, log);
        entry = cxl_event_get_head(log);
    }
    // 没有处理 clear event flags 为 1 的情况
    // clear event flags 为 0,删除特定的记录
    // 为 1 删除所有事件,但只允许在 overflowed 的情况下置1

    return CXL_MBOX_SUCCESS;
}

2.1.13 Get Feature
2.1.14 Set Feature
2.1.15 Identify Memory Device

固定值: Fw_revision, poison_list_max_mer, inject_poison_limit, dc_event_log_size


root@localhost:/mnt/shared# ./mxcli -d /dev/cxl/mem0 -cmd=identify
Opening Device: /dev/cxl/mem0
2024-08-12 16:03:32.655 | INFO     | mxlib.mxlibpy.cmds.mailbox.mbox:send_command:158 - Mailbox cmd=0 - ret_code=0
{
    "fw_revision": "BWFW VERSION 00",
    "total_capacity": 1,
    "volatile_capacity": 1,
    "persistent_capacity": 0,
    "partition_align": 0,
    "info_event_log_size": 0,
    "warning_event_log_size": 0,
    "failure_event_log_size": 0,
    "fatal_event_log_size": 0,
    "lsa_size": 268435456,
    "poison_list_max_mer": 0,
    "inject_poison_limit": 0,
    "poison_caps": 0,
    "qos_telemetry_caps": 0
}

QEMU 源码实现:

// qemu_master/qemu/hw/cxl/cxl-mailbox-utils.c
cmd_identify_memory_device()
{
    id = (void *)payload_out;
    // FW 版本写死
    snprintf(id->fw_revision, 0x10, "BWFW VERSION %02d", 0);
    // 填充其他字段
    stq_le_p(&id->total_capacity,
             cxl_dstate->static_mem_size / CXL_CAPACITY_MULTIPLIER);
    stq_le_p(&id->persistent_capacity,
             cxl_dstate->pmem_size / CXL_CAPACITY_MULTIPLIER);
    stq_le_p(&id->volatile_capacity,
             cxl_dstate->vmem_size / CXL_CAPACITY_MULTIPLIER);
    stl_le_p(&id->lsa_size, cvc->get_lsa_size(ct3d));
    /* 256 poison records */
    st24_le_p(id->poison_list_max_mer, 256);
    /* No limit - so limited by main poison record limit */
    stw_le_p(&id->inject_poison_limit, 0);
    stw_le_p(&id->dc_event_log_size, CXL_DC_EVENT_LOG_SIZE);
}

2.1.16 Get Partition Info
/* CXL r3.1 Section 8.2.9.9.2.1: Get Partition Info (Opcode 4100h) */
static CXLRetCode cmd_ccls_get_partition_info(const struct cxl_cmd *cmd,
                                              uint8_t *payload_in,
                                              size_t len_in,
                                              uint8_t *payload_out,
                                              size_t *len_out,
                                              CXLCCI *cci)
{
    CXLDeviceState *cxl_dstate = &CXL_TYPE3(cci->d)->cxl_dstate;
    struct {
        uint64_t active_vmem;
        uint64_t active_pmem;
        uint64_t next_vmem;
        uint64_t next_pmem;
    } QEMU_PACKED *part_info = (void *)payload_out;
    QEMU_BUILD_BUG_ON(sizeof(*part_info) != 0x20);
    CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);

    // 内存大小都必须 256M 对齐
    if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) ||
        (!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER)) ||
        (!QEMU_IS_ALIGNED(ct3d->dc.total_capacity, CXL_CAPACITY_MULTIPLIER))) {
        return CXL_MBOX_INTERNAL_ERROR;
    }

    // Ref CXL r3.1 Table 8-128. Get Partition Info Output Payload
    // active_vmem 表示易失性内存容量,256MB 的倍数
    stq_le_p(&part_info->active_vmem,
             cxl_dstate->vmem_size / CXL_CAPACITY_MULTIPLIER);
    /*
     * When both next_vmem and next_pmem are 0, there is no pending change to
     * partitioning.
     */
    // next_vmem next_pmem 固定写死了
    // The Next values describe a new configuration that has not yet taken effect
    // to become active on the next reset (as specified in the Set Partition command effects)
    stq_le_p(&part_info->next_vmem, 0);
    stq_le_p(&part_info->active_pmem,
             cxl_dstate->pmem_size / CXL_CAPACITY_MULTIPLIER);
    stq_le_p(&part_info->next_pmem, 0);

    *len_out = sizeof(*part_info);
    return CXL_MBOX_SUCCESS;
}

2.1.17 Get LSA

内存设备应该支持 Label Storage Area(LSA), 主要提供 pmem 持久内存的能力, 可能提供仅 volatile mem 的能力。
LSA 的大小是从 Identiy Memory device command.


/* CXL r3.1 Section 8.2.9.9.2.3: Get LSA (Opcode 4102h) */
static CXLRetCode cmd_ccls_get_lsa(const struct cxl_cmd *cmd,
                                   uint8_t *payload_in,
                                   size_t len_in,
                                   uint8_t *payload_out,
                                   size_t *len_out,
                                   CXLCCI *cci)
{
    struct {
        uint32_t offset;
        uint32_t length;
    } QEMU_PACKED *get_lsa;
    CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
    CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d);
    uint32_t offset, length;
    // Ref CXL r3.1 Table 8-130. Get LSA Input Payload
    // input payload 
    // offset 表示output payload 中 LSA 的偏移(字节)
    // Length 表示 LSA 的长度(字节)
    get_lsa = (void *)payload_in;
    offset = get_lsa->offset;
    length = get_lsa->length;

    // 如果请求的超过lsa的大小,报错
    if (offset + length > cvc->get_lsa_size(ct3d)) {
        *len_out = 0;
        return CXL_MBOX_INVALID_INPUT;
    }

    // 根据偏移和长度,读取内存中存储的 LSA 数据
    *len_out = cvc->get_lsa(ct3d, payload_out, length, offset);
    return CXL_MBOX_SUCCESS;
}


2.1.18 Set LSA

/* CXL r3.1 Section 8.2.9.9.2.4: Set LSA (Opcode 4103h) */
static CXLRetCode cmd_ccls_set_lsa(const struct cxl_cmd *cmd,
                                   uint8_t *payload_in,
                                   size_t len_in,
                                   uint8_t *payload_out,
                                   size_t *len_out,
                                   CXLCCI *cci)
{
    struct set_lsa_pl {
        uint32_t offset;
        uint32_t rsvd;
        uint8_t data[];
    } QEMU_PACKED;
    struct set_lsa_pl *set_lsa_payload = (void *)payload_in;
    CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
    CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d);
    const size_t hdr_len = offsetof(struct set_lsa_pl, data);

    *len_out = 0;
    // iput payload 为 0 
    // 合法? 未找到出处
    if (!len_in) {
        return CXL_MBOX_SUCCESS;
    }

    // Ref CXL r3.1 Table 8-132. Set LSA Input Payload
    // offset LSA 内的偏移(字节)
    // len_in 包括头的长度
    // 如果写入的内容超出lsa大小,报错
    if (set_lsa_payload->offset + len_in > cvc->get_lsa_size(ct3d) + hdr_len) {
        return CXL_MBOX_INVALID_INPUT;
    }
    // 减掉头长度,即为数据长度,写入 LSA
    len_in -= hdr_len;

    cvc->set_lsa(ct3d, set_lsa_payload->data, len_in, set_lsa_payload->offset);
    return CXL_MBOX_SUCCESS;
}

2.1.19 Get Poison List

该命令应该返回乱序的 poisoned 位置列表,或者如果主机访问地址的结果



/*
 * CXL r3.1 Section 8.2.9.9.4.1: Get Poison List (Opcode 4300h)
 *
 * This is very inefficient, but good enough for now!
 * Also the payload will always fit, so no need to handle the MORE flag and
 * make this stateful. We may want to allow longer poison lists to aid
 * testing that kernel functionality.
 */
static CXLRetCode cmd_media_get_poison_list(const struct cxl_cmd *cmd,
                                            uint8_t *payload_in,
                                            size_t len_in,
                                            uint8_t *payload_out,
                                            size_t *len_out,
                                            CXLCCI *cci)
{
    struct get_poison_list_pl {
        uint64_t pa;
        uint64_t length;
    } QEMU_PACKED;

    struct get_poison_list_out_pl {
        uint8_t flags;
        uint8_t rsvd1;
        uint64_t overflow_timestamp;
        uint16_t count;
        uint8_t rsvd2[0x14];
        struct {
            uint64_t addr;
            uint32_t length;
            uint32_t resv;
        } QEMU_PACKED records[];
    } QEMU_PACKED;

    struct get_poison_list_pl *in = (void *)payload_in;
    struct get_poison_list_out_pl *out = (void *)payload_out;
    CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
    uint16_t record_count = 0, i = 0;
    uint64_t query_start, query_length;
    CXLPoisonList *poison_list = &ct3d->poison_list;
    CXLPoison *ent;
    uint16_t out_pl_len;

    // Ref CXL r3.1 Table 8-138. Get Poison List Input Payload
    query_start = ldq_le_p(&in->pa);
    // 没有对bit0 Restart Request 进行处理
    /* 64 byte alignment required */
    if (query_start & 0x3f) {
        return CXL_MBOX_INVALID_INPUT;
    }
    // length 表示获取 Poison List 的物理地址范围, 64字节为单位
    query_length = ldq_le_p(&in->length) * CXL_CACHE_LINE_SIZE;

    QLIST_FOREACH(ent, poison_list, node) {
        /* Check for no overlap */
        // 检查是否有重叠
        if (!ranges_overlap(ent->start, ent->length,
                            query_start, query_length)) {
            // 没有重叠,就没有找到 poison 地址
            continue;
        }
        record_count++;
    }
    // 返回 REf CXL r3.1 Table 8-139. Get Poison List Output Payload
    out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]);
    assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE);

    QLIST_FOREACH(ent, poison_list, node) {
        uint64_t start, stop;

        /* Check for no overlap */
        if (!ranges_overlap(ent->start, ent->length,
                            query_start, query_length)) {
            // 没有重叠
            continue;
        }

        /* Deal with overlap */
        // start - stop 是重叠部分
        start = MAX(ROUND_DOWN(ent->start, 64ull), query_start);
        stop = MIN(ROUND_DOWN(ent->start, 64ull) + ent->length,
                   query_start + query_length);
        // bit[63:6] 设备物理地址 start, bit[5:0] ent->type 表示 error Source
        stq_le_p(&out->records[i].addr, start | (ent->type & 0x7));
        // Media Error Length, 64字节为单位
        stl_le_p(&out->records[i].length, (stop - start) / CXL_CACHE_LINE_SIZE);
        i++;
    }

    // qmp_cxl_inject_poison() 与 cmd_media_clear_poison() 中
    // 如果 list cnount 超过了 255, 则溢出
    if (ct3d->poison_list_overflowed) {
        // flags bit[1] 表示 Poison List Overflow, 置位 timestap 域有效
        out->flags = (1 << 1);
        // poison_list_overflow_ts 是第一次 poison_list_overflowed 溢出置1时记录的时间戳
        stq_le_p(&out->overflow_timestamp, ct3d->poison_list_overflow_ts);
    }
    // 如果 scan media 正在后台运行
    if (scan_media_running(cci)) {
        // flag bit2 表示scan media 命令还在后台运行
        out->flags |= (1 << 2);
    }

    stw_le_p(&out->count, record_count);
    *len_out = out_pl_len;
    return CXL_MBOX_SUCCESS;
}

2.1.20 Inject Poison

QEMU poison 制作 list 管理,没有对物理地址做其他操作;
主机使用该命令像一个物理地址注入一个 poison, 当通过 CXL.mem 访问该物理地址时,设备应该返回一个 poison.
使用该命令注入的 poison,加入到 poison list 中,类型应该为 injected error.

In addition, the device shall add an appropriate poison creation event to its internal Informational Event Log,
update the Event Status register, and if configured, interrupt the host.


/* CXL r3.1 Section 8.2.9.9.4.2: Inject Poison (Opcode 4301h) */
static CXLRetCode cmd_media_inject_poison(const struct cxl_cmd *cmd,
                                          uint8_t *payload_in,
                                          size_t len_in,
                                          uint8_t *payload_out,
                                          size_t *len_out,
                                          CXLCCI *cci)
{
    CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
    CXLPoisonList *poison_list = &ct3d->poison_list;
    CXLPoison *ent;
    struct inject_poison_pl {
        uint64_t dpa;
    };
    struct inject_poison_pl *in = (void *)payload_in;
    uint64_t dpa = ldq_le_p(&in->dpa);
    CXLPoison *p;

    // 已经有 posion 的地址,再次注入,不报错
    QLIST_FOREACH(ent, poison_list, node) {
        if (dpa >= ent->start &&
            dpa + CXL_CACHE_LINE_SIZE <= ent->start + ent->length) {
            return CXL_MBOX_SUCCESS;
        }
    }
    /*
     * Freeze the list if there is an on-going scan media operation.
     */
    // 如果后台正在运行 scan media 命令,直接返回
    if (scan_media_running(cci)) {
        /*
         * XXX: Spec is ambiguous - is this case considered
         * a successful return despite not adding to the list?
         */
        goto success;
    }

    // #define CXL_POISON_LIST_LIMIT 256
    // 如果 list cnt 超出闲置,返回 Inject Poison Limit Reached
    if (ct3d->poison_list_cnt == CXL_POISON_LIST_LIMIT) {
        return CXL_MBOX_INJECT_POISON_LIMIT;
    }
    p = g_new0(CXLPoison, 1);
    // 地址 64 字节对齐的, 长度也就是 64 字节
    p->length = CXL_CACHE_LINE_SIZE;
    p->start = dpa;
    p->type = CXL_POISON_TYPE_INJECTED;

    /*
     * Possible todo: Merge with existing entry if next to it and if same type
     */
    // 添加到链表中,并计数
    QLIST_INSERT_HEAD(poison_list, p, node);
    ct3d->poison_list_cnt++;
success:
    *len_out = 0;

    return CXL_MBOX_SUCCESS;
}

2.1.21 Clear Poison

该命令清除对应物理地址的 poison, 并将数据写入该地址;
该命令将物理地址移除 Poison List, 从没有 poison 的地址 clear poison 不报错;

/* CXL r3.1 Section 8.2.9.9.4.3: Clear Poison (Opcode 4302h */
static CXLRetCode cmd_media_clear_poison(const struct cxl_cmd *cmd,
                                         uint8_t *payload_in,
                                         size_t len_in,
                                         uint8_t *payload_out,
                                         size_t *len_out,
                                         CXLCCI *cci)
{
    CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
    CXLDeviceState *cxl_dstate = &ct3d->cxl_dstate;
    CXLPoisonList *poison_list = &ct3d->poison_list;
    CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d);
    struct clear_poison_pl {
        uint64_t dpa;
        uint8_t data[64];
    };
    CXLPoison *ent;
    uint64_t dpa;

    // Ref CXL r3.1 Table 8-142. Clear Poison Input Payload
    struct clear_poison_pl *in = (void *)payload_in;

    dpa = ldq_le_p(&in->dpa);
    // 如果物理地址超出了实际内存物理地址范围, 报错
    if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->static_mem_size +
        ct3d->dc.total_capacity) {
        return CXL_MBOX_INVALID_PA;
    }

    /* Clearing a region with no poison is not an error so always do so */
    if (cvc->set_cacheline) {
        // 写入具体的物理地址 in->data
        if (!cvc->set_cacheline(ct3d, dpa, in->data)) {
            return CXL_MBOX_INTERNAL_ERROR;
        }
    }

    /*
     * Freeze the list if there is an on-going scan media operation.
     */
    // 如果后台在运行 scan media 命令,直接返回
    if (scan_media_running(cci)) {
        /*
         * XXX: Spec is ambiguous - is this case considered
         * a successful return despite not removing from the list?
         */
        goto success;
    }

    QLIST_FOREACH(ent, poison_list, node) {
        /*
         * Test for contained in entry. Simpler than general case
         * as clearing 64 bytes and entries 64 byte aligned
         */
        if ((dpa >= ent->start) && (dpa < ent->start + ent->length)) {
            break;
        }
    }
    if (!ent) {
        // host 要处理的地址,没有在 poison list 中,不报错。
        goto success;
    }

    // 对应物理地址移除链表,poison_list_cnt 计数减1
    QLIST_REMOVE(ent, node);
    ct3d->poison_list_cnt--;

    // 要移除的64字节可能会把原来的节点分割两部分
    // 处理 ent->start 到 dpa 的空间
    if (dpa > ent->start) {
        // 如果长度超过 64 字节, dpa 虽然是64字节对齐,但是不是起点 ent->start
        // qmp_cxl_inject_poison() 长度64字节对齐,但是不是64字节
        CXLPoison *frag;
        /* Cannot overflow as replacing existing entry */

        frag = g_new0(CXLPoison, 1);
        // 移除 dpa 的范围
        // dpa + 64 字节之后后面处理
        frag->start = ent->start;
        frag->length = dpa - ent->start;
        frag->type = ent->type;
        // 添加链表和计数
        QLIST_INSERT_HEAD(poison_list, frag, node);
        ct3d->poison_list_cnt++;
    }

    if (dpa + CXL_CACHE_LINE_SIZE < ent->start + ent->length) {
        // 处理 dpa + 64 之后的空间
        CXLPoison *frag;

        if (ct3d->poison_list_cnt == CXL_POISON_LIST_LIMIT) {
            // 检查是否溢出
            cxl_set_poison_list_overflowed(ct3d);
        } else {
            frag = g_new0(CXLPoison, 1);
            // 添加新的节点
            frag->start = dpa + CXL_CACHE_LINE_SIZE;
            frag->length = ent->start + ent->length - frag->start;
            frag->type = ent->type;
            QLIST_INSERT_HEAD(poison_list, frag, node);
            ct3d->poison_list_cnt++;
        }
    }
    /* Any fragments have been added, free original entry */
    g_free(ent);
success:
    *len_out = 0;

    return CXL_MBOX_SUCCESS;
}

2.1.22 Get Scan Media Capabilities

This command allows the device to report capabilities and options for the Scan Media feature based on the requested range


/*
 * CXL r3.1 section 8.2.9.9.4.4: Get Scan Media Capabilities
 */
static CXLRetCode
cmd_media_get_scan_media_capabilities(const struct cxl_cmd *cmd,
                                      uint8_t *payload_in,
                                      size_t len_in,
                                      uint8_t *payload_out,
                                      size_t *len_out,
                                      CXLCCI *cci)
{
    struct get_scan_media_capabilities_pl {
        uint64_t pa;
        uint64_t length;
    } QEMU_PACKED;

    struct get_scan_media_capabilities_out_pl {
        uint32_t estimated_runtime_ms;
    };

    CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
    CXLDeviceState *cxl_dstate = &ct3d->cxl_dstate;
    struct get_scan_media_capabilities_pl *in = (void *)payload_in;
    struct get_scan_media_capabilities_out_pl *out = (void *)payload_out;
    uint64_t query_start;
    uint64_t query_length;

    // Ref CXL r3.1 Table 8-143. Get Scan Media Capabilities Input Payload
    query_start = ldq_le_p(&in->pa);
    /* 64 byte alignment required */
    // 64 字节对齐
    if (query_start & 0x3f) {
        return CXL_MBOX_INVALID_INPUT;
    }
    // length 64字节为单位
    query_length = ldq_le_p(&in->length) * CXL_CACHE_LINE_SIZE;

    // static_mem_size 包括 vmem 和 pmem 的大小, 即Identify Memory Device 命令的 total_capacity 字段
    // 范围不能超过这个大小
    if (query_start + query_length > cxl_dstate->static_mem_size) {
        return CXL_MBOX_INVALID_PA;
    }

    /*
     * Just use 400 nanosecond access/read latency + 100 ns for
     * the cost of updating the poison list. For small enough
     * chunks return at least 1 ms.
     */
    // 64字节 访问内存延时 0.5 us, 最大 1ms
    // Ref CXL r3.1 Table 8-144. Get Scan Media Capabilities Output Payload
    stl_le_p(&out->estimated_runtime_ms,
             MAX(1, query_length * (0.0005L / 64)));

    *len_out = sizeof(*out);
    return CXL_MBOX_SUCCESS;
}

2.1.23 Scan Media

Scan Media 命令使设备启动对部分媒体的扫描,以查找 poison 位置或导致 poison 的位置(如果主机访问了这些地址)。
设备可能会在执行扫描后更新其 poison list,并应在 Scan Media 后台操作信号完成之前完成对 poison list 的任何更改。
如果设备在执行 Scan Media 后台操作时更新其 poison list,设备应指示 Scan Media 正在进行中。
仅当 poison list 已溢出且不再是媒体上存在的内存错误的完整列表时,主机才应使用此命令。
如果请求的范围跨越设备的易失性和持久性分区,设备可能会拒绝此命令。

如果报告内部或外部 poison 中断使能,并且 poison list 没有溢出,主机不应该使用该命令,太耗时间。


/*
 * CXL r3.1 section 8.2.9.9.4.5: Scan Media
 */
static CXLRetCode cmd_media_scan_media(const struct cxl_cmd *cmd,
                                       uint8_t *payload_in,
                                       size_t len_in,
                                       uint8_t *payload_out,
                                       size_t *len_out,
                                       CXLCCI *cci)
{
    struct scan_media_pl {
        uint64_t pa;
        uint64_t length;
        uint8_t flags;
    } QEMU_PACKED;

    struct scan_media_pl *in = (void *)payload_in;
    CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
    CXLDeviceState *cxl_dstate = &ct3d->cxl_dstate;
    uint64_t query_start;
    uint64_t query_length;
    CXLPoison *ent, *next;

    query_start = ldq_le_p(&in->pa);
    /* 64 byte alignment required */
    // 64 字节对齐
    if (query_start & 0x3f) {
        return CXL_MBOX_INVALID_INPUT;
    }
    // 64字节为单位
    query_length = ldq_le_p(&in->length) * CXL_CACHE_LINE_SIZE;

    // 范围不要超过总内存
    if (query_start + query_length > cxl_dstate->static_mem_size) {
        return CXL_MBOX_INVALID_PA;
    }
    
    // 检查范围计算上 dynmic memory 
    // bug ?超过 static_mem_size 但不超过 static_mem_size + total_capacity 上一步报错了
    // 已提交 patch.
    if (ct3d->dc.num_regions && query_start + query_length >=
            cxl_dstate->static_mem_size + ct3d->dc.total_capacity) {
        return CXL_MBOX_INVALID_PA;
    }

    // Ref CXL r3.1 Table 8-145. Scan Media Input Payload
    // flags bit0 置1表示设备不应该为media error 生成事件日志
    if (in->flags == 0) { /* TODO */
        qemu_log_mask(LOG_UNIMP,
                      "Scan Media Event Log is unsupported\n");
    }

    /* any previous results are discarded upon a new Scan Media */
    // 之前的结果抛弃,因为要重新扫描
    // Any previous Scan Media results are discarded by the device upon receiving a new Scan Media command.
    QLIST_FOREACH_SAFE(ent, &ct3d->scan_media_results, node, next) {
        QLIST_REMOVE(ent, node);
        g_free(ent);
    }

    /* kill the poison list - it will be recreated */
    // 当 poison list cnt 超过 255 时,溢出
    if (ct3d->poison_list_overflowed) {
        // 把 poison list 的节点全部删除
        QLIST_FOREACH_SAFE(ent, &ct3d->poison_list, node, next) {
            QLIST_REMOVE(ent, node);
            g_free(ent);
            ct3d->poison_list_cnt--;
        }
    }

    /*
     * Scan the backup list and move corresponding entries
     * into the results list, updating the poison list
     * when possible.
     */
    // 链表 poison_list_bkp 是在 qmp_cxl_inject_poison() 中插入的,暂未使用该函数
    QLIST_FOREACH_SAFE(ent, &ct3d->poison_list_bkp, node, next) {
        CXLPoison *res;

        if (ent->start >= query_start + query_length ||
            ent->start + ent->length <= query_start) {
            // 如果 poison 内存不在请求的内存范围内,跳过
            continue;
        }

        /*
         * If a Get Poison List cmd comes in while this
         * scan is being done, it will see the new complete
         * list, while setting the respective flag.
         */
        // #define CXL_POISON_LIST_LIMIT 256
        // 如果找到了 poison 内存
        if (ct3d->poison_list_cnt < CXL_POISON_LIST_LIMIT) {
            CXLPoison *p = g_new0(CXLPoison, 1);

            p->start = ent->start;
            p->length = ent->length;
            p->type = ent->type;
            // 插入新节点
            QLIST_INSERT_HEAD(&ct3d->poison_list, p, node);
            ct3d->poison_list_cnt++;
        }

        res = g_new0(CXLPoison, 1);
        res->start = ent->start;
        res->length = ent->length;
        res->type = ent->type;
        // 扫描结果插入新节点
        QLIST_INSERT_HEAD(&ct3d->scan_media_results, res, node);

        // poison_list_bkp 删除节点
        // 扫描一次,poison 点就没有了,未找到协议规定
        QLIST_REMOVE(ent, node);
        g_free(ent);
    }

    // 64 字节模拟消耗 0.5 us, 根据长度设置后台运行时间
    cci->bg.runtime = MAX(1, query_length * (0.0005L / 64));
    // 无数据返回
    *len_out = 0;

    return CXL_MBOX_BG_STARTED;
}

2.1.24 Get Scan Media Results

返回一个无序的 poisoned memory locations list, 如果还没有调用 scan media command,设备应该返回 Unsupported return code.
返回结果可能大于 output paload size, 可能需要多次调用该命令返回结果,需要置位 More Media Error Recortds, caller 需要检查该bit


/*
 * CXL r3.1 section 8.2.9.9.4.6: Get Scan Media Results
 */
static CXLRetCode cmd_media_get_scan_media_results(const struct cxl_cmd *cmd,
                                                   uint8_t *payload_in,
                                                   size_t len_in,
                                                   uint8_t *payload_out,
                                                   size_t *len_out,
                                                   CXLCCI *cci)
{
    struct get_scan_media_results_out_pl {
        uint64_t dpa_restart;
        uint64_t length;
        uint8_t flags;
        uint8_t rsvd1;
        uint16_t count;
        uint8_t rsvd2[0xc];
        struct {
            uint64_t addr;
            uint32_t length;
            uint32_t resv;
        } QEMU_PACKED records[];
    } QEMU_PACKED;

    struct get_scan_media_results_out_pl *out = (void *)payload_out;
    CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
    CXLPoisonList *scan_media_results = &ct3d->scan_media_results;
    CXLPoison *ent, *next;
    uint16_t total_count = 0, record_count = 0, i = 0;
    uint16_t out_pl_len;

    if (!ct3d->scan_media_hasrun) {
        // 如果还没有扫描过,返回 Unsupported return code
        return CXL_MBOX_UNSUPPORTED;
    }

    /*
     * Calculate limits, all entries are within the same address range of the
     * last scan media call.
     */
    QLIST_FOREACH(ent, scan_media_results, node) {
        size_t rec_size = record_count * sizeof(out->records[0]);
        // CXL_MAILBOX_MAX_PAYLOAD_SIZE (1 << 11)
        // 查看大小不要超过 output payload size
        if (sizeof(*out) + rec_size < CXL_MAILBOX_MAX_PAYLOAD_SIZE) {
            record_count++;
        }
        total_count++;
    }

    // 本次操作返回的数据长度
    out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]);
    assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE);

    memset(out, 0, out_pl_len);
    QLIST_FOREACH_SAFE(ent, scan_media_results, node, next) {
        uint64_t start, stop;

        if (i == record_count) {
            break;
        }
        // Ref CXL r3.1 Table 8-146. Get Scan Media Results Output Payload
        // 前 8 个字节:物理地址, 64字节对齐
        start = ROUND_DOWN(ent->start, 64ull);
        stop = ROUND_DOWN(ent->start, 64ull) + ent->length;
        // ent->type 协议里未写明要返回,可能bug
        stq_le_p(&out->records[i].addr, start | (ent->type & 0x7));
        // 长度,64字节为单位
        stl_le_p(&out->records[i].length, (stop - start) / CXL_CACHE_LINE_SIZE);
        i++;

        /* consume the returning entry */
        QLIST_REMOVE(ent, node);
        g_free(ent);
    }

    stw_le_p(&out->count, record_count);
    if (total_count > record_count) {
        // 如果还有其他的记录没有发完,标记 flags 的 bit0
        out->flags = (1 << 0); /* More Media Error Records */
    }

    *len_out = out_pl_len;
    return CXL_MBOX_SUCCESS;
}

2.1.25 Sanitize

清理设备以安全地重新利用或停用它。
清理的具体方法取决于供应商。
清理不会重置任何内部使用情况统计信息或计数器,也不会以任何方式人为延长设备的使用寿命。
与通过更改加密密钥来擦除数据的安全擦除不同,成功的清理命令可确保没有用户数据可用(加密或其他)。

本函数只是 disable midia, 并启动后台命令程序,时间根据内存大小模拟,后台对每一块内存进行清0,并抛弃所有 event records.


/*
 * CXL r3.1 Section 8.2.9.9.5.1: Sanitize (Opcode 4400h)
 *
 * Once the Sanitize command has started successfully, the device shall be
 * placed in the media disabled state. If the command fails or is interrupted
 * by a reset or power failure, it shall remain in the media disabled state
 * until a successful Sanitize command has been completed. During this state:
 *
 * 1. Memory writes to the device will have no effect, and all memory reads
 * will return random values (no user data returned, even for locations that
 * the failed Sanitize operation didn’t sanitize yet).
 *
 * 2. Mailbox commands shall still be processed in the disabled state, except
 * that commands that access Sanitized areas shall fail with the Media Disabled
 * error code.
 */
static CXLRetCode cmd_sanitize_overwrite(const struct cxl_cmd *cmd,
                                         uint8_t *payload_in,
                                         size_t len_in,
                                         uint8_t *payload_out,
                                         size_t *len_out,
                                         CXLCCI *cci)
{
    CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
    uint64_t total_mem; /* in Mb */
    int secs;

    total_mem = (ct3d->cxl_dstate.vmem_size + ct3d->cxl_dstate.pmem_size) >> 20;
    if (total_mem <= 512) {
        secs = 4;
    } else if (total_mem <= 1024) {
        secs = 8;
    } else if (total_mem <= 2 * 1024) {
        secs = 15;
    } else if (total_mem <= 4 * 1024) {
        secs = 30;
    } else if (total_mem <= 8 * 1024) {
        secs = 60;
    } else if (total_mem <= 16 * 1024) {
        secs = 2 * 60;
    } else if (total_mem <= 32 * 1024) {
        secs = 4 * 60;
    } else if (total_mem <= 64 * 1024) {
        secs = 8 * 60;
    } else if (total_mem <= 128 * 1024) {
        secs = 15 * 60;
    } else if (total_mem <= 256 * 1024) {
        secs = 30 * 60;
    } else if (total_mem <= 512 * 1024) {
        secs = 60 * 60;
    } else if (total_mem <= 1024 * 1024) {
        secs = 120 * 60;
    } else {
        secs = 240 * 60; /* max 4 hrs */
    }

    /* EBUSY other bg cmds as of now */
    cci->bg.runtime = secs * 1000UL;
    *len_out = 0;
    // Once the Sanitize command has started successfully, the device shall be placed in the media disabled state.
    // 命令失败或者中断,都是 disable 状态,知道 sanitize 命令成功完成
    cxl_dev_disable_media(&ct3d->cxl_dstate);

    /* sanitize when done */
    return CXL_MBOX_BG_STARTED;
}

2.1.26 Get supported Log

Host 命令获取:


root@localhost:/work# ./mxcli -d /dev/cxl/mem0 -cmd=get_supported_log
Opening Device: /dev/cxl/mem0
2024-08-13 17:58:25.680 | INFO     | mxlib.mxlibpy.cmds.mailbox.mbox:send_command:158 - Mailbox cmd=0 - ret_code=0
{
    "num_log_entry": 1,
    "log": [
        {
            "log_id": "0da9c0b5-bf41-4b78-8f79-96b1623b3f17",
            "log_size": 104
        }
    ]
}

Qemu 源码实现:

// 只返回了 Command Effects Log (CEL) UUID 和 大小
/* CXL r3.1 Section 8.2.9.5.1: Get Supported Logs (Opcode 0400h) */
static CXLRetCode cmd_logs_get_supported(const struct cxl_cmd *cmd,
                                         uint8_t *payload_in,
                                         size_t len_in,
                                         uint8_t *payload_out,
                                         size_t *len_out,
                                         CXLCCI *cci)
{
    struct {
        uint16_t entries;
        uint8_t rsvd[6];
        struct {
            QemuUUID uuid;
            uint32_t size;
        } log_entries[1];
    } QEMU_PACKED *supported_logs = (void *)payload_out;
    QEMU_BUILD_BUG_ON(sizeof(*supported_logs) != 0x1c);

    supported_logs->entries = 1;
    supported_logs->log_entries[0].uuid = cel_uuid;
    supported_logs->log_entries[0].size = 4 * cci->cel_size;

    *len_out = sizeof(*supported_logs);
    return CXL_MBOX_SUCCESS;
}

2.2 mailbox data handle

// qemu_master/qemu/hw/cxl/cxl-mailbox-utils.c
int cxl_process_cci_message(CXLCCI *cci, uint8_t set, uint8_t cmd,
                            size_t len_in, uint8_t *pl_in, size_t *len_out,
                            uint8_t *pl_out, bool *bg_started)
{
    int ret;
    const struct cxl_cmd *cxl_cmd;
    opcode_handler h;
    CXLDeviceState *cxl_dstate;

    *len_out = 0;
    cxl_cmd = &cci->cxl_cmd_set[set][cmd];
    h = cxl_cmd->handler;
    if (!h) {
        qemu_log_mask(LOG_UNIMP, "Command %04xh not implemented\n",
                      set << 8 | cmd);
        return CXL_MBOX_UNSUPPORTED;
    }

    if (len_in != cxl_cmd->in && cxl_cmd->in != ~0) {
        return CXL_MBOX_INVALID_PAYLOAD_LENGTH;
    }

    /* Only one bg command at a time */
    // 要运行后台操作,会设置 cci->bg.runtime 大于 0
    // 如果后台已经有命令运行,返回忙
    if ((cxl_cmd->effect & CXL_MBOX_BACKGROUND_OPERATION) &&
        cci->bg.runtime > 0) {
        return CXL_MBOX_BUSY;
    }

    /* forbid any selected commands while the media is disabled */

    if (object_dynamic_cast(OBJECT(cci->d), TYPE_CXL_TYPE3)) {
        cxl_dstate = &CXL_TYPE3(cci->d)->cxl_dstate;

        if (cxl_dev_media_disabled(cxl_dstate)) {
            // media disabled 时, 禁止以下命令
            // Ref CLr3.1 8.2.8.5.1.1 Memory Device Status Register  Media Status [3:2] 11b: disable
            if (h == cmd_events_get_records ||
                h == cmd_ccls_get_partition_info ||
                h == cmd_ccls_set_lsa ||
                h == cmd_ccls_get_lsa ||
                h == cmd_logs_get_log ||
                h == cmd_media_get_poison_list ||
                h == cmd_media_inject_poison ||
                h == cmd_media_clear_poison ||
                h == cmd_sanitize_overwrite ||
                h == cmd_firmware_update_transfer ||
                h == cmd_firmware_update_activate) {
                return CXL_MBOX_MEDIA_DISABLED;
            }
        }
    }

    // 运行 mailbox cmd对应的处理函数, cxl_cmd_set 数组中定义
    ret = (*h)(cxl_cmd, pl_in, len_in, pl_out, len_out, cci);
    if ((cxl_cmd->effect & CXL_MBOX_BACKGROUND_OPERATION) &&
        ret == CXL_MBOX_BG_STARTED) {
        // 如果 cxl_cmd->effect bit6 置1,并且返回 CXL_MBOX_BG_STARTED, 启动后台命令
        *bg_started = true;
    } else {
        *bg_started = false;
    }

    /* Set bg and the return code */
    if (*bg_started) {
        uint64_t now;

        cci->bg.opcode = (set << 8) | cmd;

        cci->bg.complete_pct = 0;
        cci->bg.ret_code = 0;

        now = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
        cci->bg.starttime = now;
        // #define CXL_MBOX_BG_UPDATE_FREQ 1000UL
        // 启动定时器,1S后触发中断, 调用 bg_timercb 处理函数
        timer_mod(cci->bg.timer, now + CXL_MBOX_BG_UPDATE_FREQ);
    }

    return ret;
}


static void __do_scan_media(CXLType3Dev *ct3d)
{
    CXLPoison *ent;
    unsigned int results_cnt = 0;

    QLIST_FOREACH(ent, &ct3d->scan_media_results, node) {
        results_cnt++;
    }

    /* only scan media may clear the overflow */
    if (ct3d->poison_list_overflowed &&
        ct3d->poison_list_cnt == results_cnt) {
        cxl_clear_poison_list_overflowed(ct3d);
    }
    /* scan media has run since last conventional reset */
    // 表示 scan media 复位之后已经运行过了
    // 没有其他地方置 false
    ct3d->scan_media_hasrun = true;
}


/* Perform the actual device zeroing */
static void __do_sanitization(CXLType3Dev *ct3d)
{
    MemoryRegion *mr;

    // 对每一部分的内存进行清 0
    if (ct3d->hostvmem) {
        mr = host_memory_backend_get_memory(ct3d->hostvmem);
        if (mr) {
            void *hostmem = memory_region_get_ram_ptr(mr);
            memset(hostmem, 0, memory_region_size(mr));
        }
    }

    if (ct3d->hostpmem) {
        mr = host_memory_backend_get_memory(ct3d->hostpmem);
        if (mr) {
            void *hostmem = memory_region_get_ram_ptr(mr);
            memset(hostmem, 0, memory_region_size(mr));
        }
    }
    if (ct3d->lsa) {
        mr = host_memory_backend_get_memory(ct3d->lsa);
        if (mr) {
            void *lsa = memory_region_get_ram_ptr(mr);
            memset(lsa, 0, memory_region_size(mr));
        }
    }
    // 抛弃了所有事件记录
    cxl_discard_all_event_records(&ct3d->cxl_dstate);
}


// 后台命令处理函数
static void bg_timercb(void *opaque)
{
    CXLCCI *cci = opaque;
    uint64_t now = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
    // 根据 bg.runtime 变量设置后天命令运行的时间
    uint64_t total_time = cci->bg.starttime + cci->bg.runtime;

    assert(cci->bg.runtime > 0);

    if (now >= total_time) { /* we are done */
        // 后台命令运行的时间到,进行处理
        uint16_t ret = CXL_MBOX_SUCCESS;
        // 设置进度条与返回值
        cci->bg.complete_pct = 100;
        cci->bg.ret_code = ret;
        switch (cci->bg.opcode) {
        case 0x0201: /* fw transfer */
            // 什么也没做,收到 full 或者 end, 设置slot[] 为 True, 停止传输 fw.transferring = false
            __do_firmware_xfer(cci);
            break;
        case 0x4400: /* sanitize */
        {
            CXLType3Dev *ct3d = CXL_TYPE3(cci->d);

            __do_sanitization(ct3d);
            // sanitize 命令完成,使能 media
            cxl_dev_enable_media(&ct3d->cxl_dstate);
        }
        break;
        case 0x4304: /* scan media */
        {
            CXLType3Dev *ct3d = CXL_TYPE3(cci->d);

            __do_scan_media(ct3d);
            break;
        }
        default:
            __builtin_unreachable();
            break;
        }
    } else {
        /* estimate only */
        // 后台命令还没运行完,设置进度条
        // 进度不准确,已经有 patch 修改了
        cci->bg.complete_pct = 100 * now / total_time;
        // 定时器定时,1S 触发
        timer_mod(cci->bg.timer, now + CXL_MBOX_BG_UPDATE_FREQ);
    }

    if (cci->bg.complete_pct == 100) {
        /* TODO: generalize to switch CCI */
        // 后台命令云心完成,发起中断
        CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
        CXLDeviceState *cxl_dstate = &ct3d->cxl_dstate;
        PCIDevice *pdev = PCI_DEVICE(cci->d);

        cci->bg.starttime = 0;
        /* registers are updated, allow new bg-capable cmds */
        cci->bg.runtime = 0;

        if (msix_enabled(pdev)) {
            msix_notify(pdev, cxl_dstate->mbox_msi_n);
        } else if (msi_enabled(pdev)) {
            msi_notify(pdev, cxl_dstate->mbox_msi_n);
        }
    }
}

2.3 define but unsed function

设备有动态内存时,才会启用。

// qemu_master/qemu/hw/cxl/cxl-mailbox-utils.c
static const struct cxl_cmd cxl_cmd_set_dcd[256][256] = {
    [DCD_CONFIG][GET_DC_CONFIG] = { "DCD_GET_DC_CONFIG",
        cmd_dcd_get_dyn_cap_config, 2, 0 },
    [DCD_CONFIG][GET_DYN_CAP_EXT_LIST] = {
        "DCD_GET_DYNAMIC_CAPACITY_EXTENT_LIST", cmd_dcd_get_dyn_cap_ext_list,
        8, 0 },
    [DCD_CONFIG][ADD_DYN_CAP_RSP] = {
        "DCD_ADD_DYNAMIC_CAPACITY_RESPONSE", cmd_dcd_add_dyn_cap_rsp,
        ~0, CXL_MBOX_IMMEDIATE_DATA_CHANGE },
    [DCD_CONFIG][RELEASE_DYN_CAP] = {
        "DCD_RELEASE_DYNAMIC_CAPACITY", cmd_dcd_release_dyn_cap,
        ~0, CXL_MBOX_IMMEDIATE_DATA_CHANGE },
};

2.3.1 Get Dynamic Capacity Configuration

获取设备动态容量的配置。



/*
 * CXL r3.1 section 8.2.9.9.9.1: Get Dynamic Capacity Configuration
 * (Opcode: 4800h)
 */
static CXLRetCode cmd_dcd_get_dyn_cap_config(const struct cxl_cmd *cmd,
                                             uint8_t *payload_in,
                                             size_t len_in,
                                             uint8_t *payload_out,
                                             size_t *len_out,
                                             CXLCCI *cci)
{
    CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
    struct {
        uint8_t region_cnt;
        uint8_t start_rid;
    } QEMU_PACKED *in = (void *)payload_in;
    struct {
        uint8_t num_regions;
        uint8_t regions_returned;
        uint8_t rsvd1[6];
        struct {
            uint64_t base;
            uint64_t decode_len;
            uint64_t region_len;
            uint64_t block_size;
            uint32_t dsmadhandle;
            uint8_t flags;
            uint8_t rsvd2[3];
        } QEMU_PACKED records[];
    } QEMU_PACKED *out = (void *)payload_out;
    struct {
        uint32_t num_extents_supported;
        uint32_t num_extents_available;
        uint32_t num_tags_supported;
        uint32_t num_tags_available;
    } QEMU_PACKED *extra_out;
    uint16_t record_count;
    uint16_t i;
    uint16_t out_pl_len;
    uint8_t start_rid;

    // Ref CXL r3.1 Table 8-163. Get Dynamic Capacity Configuration Input Payload
    // start region index: 从0开始,第一个请求的 region
    start_rid = in->start_rid;
    // 不能超过总数量,从0开始,所以要判断等于
    if (start_rid >= ct3d->dc.num_regions) {
        return CXL_MBOX_INVALID_INPUT;
    }

    // in->region Count: 返回数据中最大的 Region 数量
    record_count = MIN(ct3d->dc.num_regions - in->start_rid, in->region_cnt);

    // Ref CXL r3.1 Table 8-164. Get Dynamic Capacity Configuration Output Payload
    out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]);
    extra_out = (void *)(payload_out + out_pl_len);
    out_pl_len += sizeof(*extra_out);
    assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE);

    // number of Available Regions: A DCD 应该报告 1 - 8 regions.
    out->num_regions = ct3d->dc.num_regions;
    // Regions Returned
    out->regions_returned = record_count;
    for (i = 0; i < record_count; i++) {
        // 填充每一项
        stq_le_p(&out->records[i].base,
                 ct3d->dc.regions[start_rid + i].base);
        stq_le_p(&out->records[i].decode_len,
                 ct3d->dc.regions[start_rid + i].decode_len /
                 CXL_CAPACITY_MULTIPLIER);
        stq_le_p(&out->records[i].region_len,
                 ct3d->dc.regions[start_rid + i].len);
        stq_le_p(&out->records[i].block_size,
                 ct3d->dc.regions[start_rid + i].block_size);
        stl_le_p(&out->records[i].dsmadhandle,
                 ct3d->dc.regions[start_rid + i].dsmadhandle);
        out->records[i].flags = ct3d->dc.regions[start_rid + i].flags;
    }
    /*
     * TODO: Assign values once extents and tags are introduced
     * to use.
     */
    // 剩余值写死
    stl_le_p(&extra_out->num_extents_supported, CXL_NUM_EXTENTS_SUPPORTED);
    stl_le_p(&extra_out->num_extents_available, CXL_NUM_EXTENTS_SUPPORTED -
             ct3d->dc.total_extent_count);
    stl_le_p(&extra_out->num_tags_supported, CXL_NUM_TAGS_SUPPORTED);
    stl_le_p(&extra_out->num_tags_available, CXL_NUM_TAGS_SUPPORTED);

    *len_out = out_pl_len;
    return CXL_MBOX_SUCCESS;
}


2.3.2 Get Dynamic Capacity Extent List

获取动态容量扩展列表。
因为扩展列表是基于DPA的,可能阔约多个 region, 所以主机应该读取完整的扩展列表。



/*
 * CXL r3.1 section 8.2.9.9.9.2:
 * Get Dynamic Capacity Extent List (Opcode 4801h)
 */
static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(const struct cxl_cmd *cmd,
                                               uint8_t *payload_in,
                                               size_t len_in,
                                               uint8_t *payload_out,
                                               size_t *len_out,
                                               CXLCCI *cci)
{
    CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
    struct {
        uint32_t extent_cnt;
        uint32_t start_extent_id;
    } QEMU_PACKED *in = (void *)payload_in;
    struct {
        uint32_t count;
        uint32_t total_extents;
        uint32_t generation_num;
        uint8_t rsvd[4];
        CXLDCExtentRaw records[];
    } QEMU_PACKED *out = (void *)payload_out;
    uint32_t start_extent_id = in->start_extent_id;
    CXLDCExtentList *extent_list = &ct3d->dc.extents;
    uint16_t record_count = 0, i = 0, record_done = 0;
    uint16_t out_pl_len, size;
    CXLDCExtent *ent;

    // The device shall return Invalid Input if the Starting Extent Index value is greater than the Total Extent Count value
    if (start_extent_id > ct3d->dc.total_extent_count) {
        return CXL_MBOX_INVALID_INPUT;
    }

    // Ref CXL r3.1 Table 8-166. Get Dynamic Capacity Extent List Input Payload
    // Extent Count: 返回数据中最大的扩展数量,不能返回比请求的多,但可以少
    
    record_count = MIN(in->extent_cnt,
                       ct3d->dc.total_extent_count - start_extent_id);
    size = CXL_MAILBOX_MAX_PAYLOAD_SIZE - sizeof(*out);
    record_count = MIN(record_count, size / sizeof(out->records[0]));
    out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]);

    // Ref CXL r3.1 Table 8-167. Get Dynamic Capacity Extent List Output Payload
    // count & total_extents: 用来计算有没有提取完
    stl_le_p(&out->count, record_count);
    stl_le_p(&out->total_extents, ct3d->dc.total_extent_count);
    stl_le_p(&out->generation_num, ct3d->dc.ext_list_gen_seq);

    // 如果 Extent Count 为0,设备返回 Total Extent Count and Extent List Generation Number,但没有 Extent data
    if (record_count > 0) {
        // Ref CXL r3.1 Table 8-51. Dynamic Capacity Extent
        CXLDCExtentRaw *out_rec = &out->records[record_done];

        QTAILQ_FOREACH(ent, extent_list, node) {
            if (i++ < start_extent_id) {
                continue;
            }
            // 填充每一项
            stq_le_p(&out_rec->start_dpa, ent->start_dpa);
            stq_le_p(&out_rec->len, ent->len);
            memcpy(&out_rec->tag, ent->tag, 0x10);
            stw_le_p(&out_rec->shared_seq, ent->shared_seq);

            record_done++;
            if (record_done == record_count) {
                break;
            }
        }
    }

    *len_out = out_pl_len;
    return CXL_MBOX_SUCCESS;
}

2.3.3 Add Dynamic Capacity Response

/*
 * CXL r3.1 section 8.2.9.9.9.3: Add Dynamic Capacity Response (Opcode 4802h)
 * An extent is added to the extent list and becomes usable only after the
 * response is processed successfully.
 */
static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct cxl_cmd *cmd,
                                          uint8_t *payload_in,
                                          size_t len_in,
                                          uint8_t *payload_out,
                                          size_t *len_out,
                                          CXLCCI *cci)
{
    CXLUpdateDCExtentListInPl *in = (void *)payload_in;
    CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
    CXLDCExtentList *extent_list = &ct3d->dc.extents;
    uint32_t i;
    uint64_t dpa, len;
    CXLRetCode ret;

    // Ref CXL r3.1 Table 8-168. Add Dynamic Capacity Response Input Payload
    // Updated Extent List Size 可以是 0
    // extents_pending 链表是在 qmp_cxl_add_dynamic_capacity() 添加,暂未使用
    if (in->num_entries_updated == 0) {
        cxl_extent_group_list_delete_front(&ct3d->dc.extents_pending);
        return CXL_MBOX_SUCCESS;
    }

    /* Adding extents causes exceeding device's extent tracking ability. */
    if (in->num_entries_updated + ct3d->dc.total_extent_count >
        CXL_NUM_EXTENTS_SUPPORTED) {
        return CXL_MBOX_RESOURCES_EXHAUSTED;
    }

    ret = cxl_detect_malformed_extent_list(ct3d, in);
    if (ret != CXL_MBOX_SUCCESS) {
        return ret;
    }

    ret = cxl_dcd_add_dyn_cap_rsp_dry_run(ct3d, in);
    if (ret != CXL_MBOX_SUCCESS) {
        return ret;
    }

    for (i = 0; i < in->num_entries_updated; i++) {
        dpa = in->updated_entries[i].start_dpa;
        len = in->updated_entries[i].len;

        cxl_insert_extent_to_extent_list(extent_list, dpa, len, NULL, 0);
        ct3d->dc.total_extent_count += 1;
        ct3_set_region_block_backed(ct3d, dpa, len);
    }
    /* Remove the first extent group in the pending list */
    cxl_extent_group_list_delete_front(&ct3d->dc.extents_pending);

    return CXL_MBOX_SUCCESS;
}

2.3.4 Release Dynamic Capacity

/*
 * CXL r3.1 section 8.2.9.9.9.4: Release Dynamic Capacity (Opcode 4803h)
 */
static CXLRetCode cmd_dcd_release_dyn_cap(const struct cxl_cmd *cmd,
                                          uint8_t *payload_in,
                                          size_t len_in,
                                          uint8_t *payload_out,
                                          size_t *len_out,
                                          CXLCCI *cci)
{
    CXLUpdateDCExtentListInPl *in = (void *)payload_in;
    CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
    CXLDCExtentList updated_list;
    CXLDCExtent *ent, *ent_next;
    uint32_t updated_list_size;
    CXLRetCode ret;

    if (in->num_entries_updated == 0) {
        return CXL_MBOX_INVALID_INPUT;
    }

    ret = cxl_detect_malformed_extent_list(ct3d, in);
    if (ret != CXL_MBOX_SUCCESS) {
        return ret;
    }

    ret = cxl_dc_extent_release_dry_run(ct3d, in, &updated_list,
                                        &updated_list_size);
    if (ret != CXL_MBOX_SUCCESS) {
        return ret;
    }

    /*
     * If the dry run release passes, the returned updated_list will
     * be the updated extent list and we just need to clear the extents
     * in the accepted list and copy extents in the updated_list to accepted
     * list and update the extent count;
     */
    QTAILQ_FOREACH_SAFE(ent, &ct3d->dc.extents, node, ent_next) {
        ct3_clear_region_block_backed(ct3d, ent->start_dpa, ent->len);
        cxl_remove_extent_from_extent_list(&ct3d->dc.extents, ent);
    }
    copy_extent_list(&ct3d->dc.extents, &updated_list);
    QTAILQ_FOREACH_SAFE(ent, &updated_list, node, ent_next) {
        ct3_set_region_block_backed(ct3d, ent->start_dpa, ent->len);
        cxl_remove_extent_from_extent_list(&updated_list, ent);
    }
    ct3d->dc. = updated_list_size;

    return CXL_MBOX_SUCCESS;
}


  • 20
    点赞
  • 13
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

Call Me Gavyn

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值