文章目录
前言
CXL 是一个比较新的技术,内核版本迭代太快,跟不上节奏,固定一个版本是不行了。
在阅读之前,希望读者能有一定的 PCIe/CXL 基础知识,精力有限,不能把所有知识点都能说的很详细,需要一定的基础才能理解,同时,希望在学习的过程中,手边能有 PCIe Spec 以及 CXL 2.0 /3.1 Spec,以便随时查看,当然,我也会尽量把重点的部分截图在博文中。
最后,如果有问题请留言讨论。
Ref
《PCI_Express_Base_5.0r1.0》
《CXL Specification_rev2p0_ver1p0_2020Oct26》
《CXL-3.1-Specification》
正文
1. cxl_enumerate_cmds()
把支持的命令寻找出来,并置相关标志bit:
1. 如果找到对应的 mem_command, 然后在bit域使能 mds->enabled_cmds
2. 如果是 poison 命令,也要使能相应的 mds->poison->enabled_cmds
3. 如果是安全相关的命令,也要使能相应bit mds->security->enabled_cmds
/**
* cxl_enumerate_cmds() - Enumerate commands for a device.
* @mds: The driver data for the operation
*
* Returns 0 if enumerate completed successfully.
*
* CXL devices have optional support for certain commands. This function will
* determine the set of supported commands for the hardware and update the
* enabled_cmds bitmap in the @mds.
*/
int cxl_enumerate_cmds(struct cxl_memdev_state *mds)
{
struct cxl_mbox_get_supported_logs *gsl;
struct device *dev = mds->cxlds.dev;
struct cxl_mem_command *cmd;
int i, rc;
gsl = cxl_get_gsl(mds);
// CXL_MBOX_OP_GET_SUPPORTED_LOGS = 0x0400,
// 发送 CXL_MBOX_OP_GET_SUPPORTED_LOGS 邮箱命令
// 获取设备指定的列表,通过UUID区分,以及每条日志的最大大小
// CXL 2.0r Table 152. CXL Device Command Opcodes
if (IS_ERR(gsl))
return PTR_ERR(gsl);
rc = -ENOENT;
// le16_to_cpu 小端存储,转换成本地CPU格式
for (i = 0; i < le16_to_cpu(gsl->entries); i++) {
// 获取日志大小
// CXL 3.1r Table 8-71. Get Supported Logs Supported Log Entry
u32 size = le32_to_cpu(gsl->entry[i].size);
uuid_t uuid = gsl->entry[i].uuid;
u8 *log;
dev_dbg(dev, "Found LOG type %pU of size %d", &uuid, size);
if (!uuid_equal(&uuid, &log_uuid[CEL_UUID]))
// 如果不是 Command Effects Log ,继续
continue;
// 找到 Command Effects Log ,申请内存
// CEL 是一个变长的日志页,报告CCI支持的每一条命令,以及命令在设备子系统中的影响
// 设备应该为所有支持的命令实现 CEL, 包括厂家定义的命令
log = kvmalloc(size, GFP_KERNEL);
if (!log) {
rc = -ENOMEM;
goto out;
}
rc = cxl_xfer_log(mds, &uuid, &size, log);
// 发送 CXL_MBOX_OP_GET_LOG 命令, 负载是 uuid offset = 0
// get log input payload ref CXL 3.1r Table 8-72. Get Log Input Payload
// CXL_MBOX_OP_GET_LOG = 0x0401
// 通过 Get Log 命令从设备获取一条日志,UUID唯一标识
// 主机先通过 Get Supported Logs command 获取日志大小,申请内存,获取所有信息
// 然后每次增加 offset, 赋值给 size
// 设备应该返回 invalid input ,如果offset或者lengh 尝试越界访问日志大小空间
if (rc) {
kvfree(log);
goto out;
}
cxl_walk_cel(mds, size, log);
// 遍历 Command Effects Log
// CEL output 每 4 个字节一个 entry, 包含 command effects log
// 每个 entry 解析 Ref 3.1r Table 8-75. CEL Entry Structure
// 找到对应的 mem_command, 然后在bit域使能 mds->enabled_cmds
// 如果是 poison 命令,也要使能相应的 mds->poison->enabled_cmds
// 如果是安全相关的命令,也要使能相应bit mds->security->enabled_cmds
kvfree(log);
/* In case CEL was bogus, enable some default commands. */
// 有一些命令定义的时候就使能了, CXL_CMD_FLAG_FORCE_ENABLE 字段定义
// CXL_CMD(IDENTIFY, 0, 0x43, CXL_CMD_FLAG_FORCE_ENABLE)
// CXL_CMD(GET_SUPPORTED_LOGS, 0, CXL_VARIABLE_PAYLOAD, CXL_CMD_FLAG_FORCE_ENABLE)
// CXL_CMD(GET_LOG, 0x18, CXL_VARIABLE_PAYLOAD, CXL_CMD_FLAG_FORCE_ENABLE)
cxl_for_each_cmd(cmd)
if (cmd->flags & CXL_CMD_FLAG_FORCE_ENABLE)
set_bit(cmd->info.id, mds->enabled_cmds);
/* Found the required CEL */
rc = 0;
}
out:
kvfree(gsl);
return rc;
}
EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, CXL);
2. cxl_mem_get_event_records()
从设备获取事件记录, status 设置级别
void cxl_mem_get_event_records(struct cxl_memdev_state *mds, u32 status)
{
dev_dbg(mds->cxlds.dev, "Reading event logs: %x\n", status);
if (status & CXLDEV_EVENT_STATUS_FATAL)
cxl_mem_get_records_log(mds, CXL_EVENT_TYPE_FATAL);
if (status & CXLDEV_EVENT_STATUS_FAIL)
cxl_mem_get_records_log(mds, CXL_EVENT_TYPE_FAIL);
if (status & CXLDEV_EVENT_STATUS_WARN)
cxl_mem_get_records_log(mds, CXL_EVENT_TYPE_WARN);
if (status & CXLDEV_EVENT_STATUS_INFO)
cxl_mem_get_records_log(mds, CXL_EVENT_TYPE_INFO);
}
EXPORT_SYMBOL_NS_GPL(cxl_mem_get_event_records, CXL);
3. cxl_mem_get_records_log()
static void cxl_mem_get_records_log(struct cxl_memdev_state *mds,
enum cxl_event_log_type type)
{
struct cxl_memdev *cxlmd = mds->cxlds.cxlmd;
struct device *dev = mds->cxlds.dev;
struct cxl_get_event_payload *payload;
u8 log_type = type;
u16 nr_rec;
mutex_lock(&mds->event.log_lock);
payload = mds->event.buf;
do {
int rc, i;
struct cxl_mbox_cmd mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_GET_EVENT_RECORD,
.payload_in = &log_type,
.size_in = sizeof(log_type),
.payload_out = payload,
.size_out = mds->payload_size,
.min_out = struct_size(payload, records, 0),
};
// CXL_MBOX_OP_GET_EVENT_RECORD = 0x0100,
// CXL 3.1r 8.2.9.2.2 Get Event Records
// 获取事件记录
rc = cxl_internal_send_cmd(mds, &mbox_cmd);
if (rc) {
dev_err_ratelimited(dev,
"Event log '%d': Failed to query event records : %d",
type, rc);
break;
}
// 返回格式如下图 Get Event Records Output Payload
nr_rec = le16_to_cpu(payload->record_count);
// 事件记录的数量,为0表示没有更多的记录了
if (!nr_rec)
break;
for (i = 0; i < nr_rec; i++)
// 获取记录
__cxl_event_trace_record(cxlmd, type,
&payload->records[i]);
if (payload->flags & CXL_GET_EVENT_FLAG_OVERFLOW)
// 当错误发生,设备不能记录日志时,设备置该位
// First Overflow Event Timestamp and Last Overflow Event Timestamp fields shall be valid
// 除非主机读取了记录或者清空记录,否则一直置1
// 可以获取 overflow_err_count 设备检测的因溢出未记录的错误数量
// first_overflow_timestamp 第一次引起溢出的时间戳, 从 1-jan-190 开始,如果设备没有正确的时间戳,返回0
// last_overflow_timestamp 最后一次事件的时间戳,从 1-jan-190 开始,如果设备没有正确的时间戳,返回0
trace_cxl_overflow(cxlmd, type, payload);
rc = cxl_clear_event_record(mds, type, payload);
// 使用 CXL_MBOX_OP_CLEAR_EVENT_RECORD 清空记录
if (rc) {
dev_err_ratelimited(dev,
"Event log '%d': Failed to clear events : %d",
type, rc);
break;
}
} while (nr_rec);
mutex_unlock(&mds->event.log_lock);
}
Get Event Records Output Payload
4. __cxl_event_trace_record() / cxl_event_trace_record()
根据记录中的 UUID 处理不同的事件, UUID Ref CXL 3.1r Table 8-43. Common Event Record Format
static void __cxl_event_trace_record(const struct cxl_memdev *cxlmd,
enum cxl_event_log_type type,
struct cxl_event_record_raw *record)
{
enum cxl_event_type ev_type = CXL_CPER_EVENT_GENERIC;
const uuid_t *uuid = &record->id;
if (uuid_equal(uuid, &CXL_EVENT_GEN_MEDIA_UUID))
ev_type = CXL_CPER_EVENT_GEN_MEDIA;
else if (uuid_equal(uuid, &CXL_EVENT_DRAM_UUID))
ev_type = CXL_CPER_EVENT_DRAM;
else if (uuid_equal(uuid, &CXL_EVENT_MEM_MODULE_UUID))
ev_type = CXL_CPER_EVENT_MEM_MODULE;
cxl_event_trace_record(cxlmd, type, ev_type, uuid, &record->event);
}
void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
enum cxl_event_log_type type,
enum cxl_event_type event_type,
const uuid_t *uuid, union cxl_event *evt)
{
if (event_type == CXL_CPER_EVENT_MEM_MODULE) {
// 获取 Memory Module Event Record
trace_cxl_memory_module(cxlmd, type, &evt->mem_module);
return;
}
if (event_type == CXL_CPER_EVENT_GENERIC) {
trace_cxl_generic_event(cxlmd, type, uuid, &evt->generic);
return;
}
if (trace_cxl_general_media_enabled() || trace_cxl_dram_enabled()) {
u64 dpa, hpa = ULLONG_MAX;
struct cxl_region *cxlr;
/*
* These trace points are annotated with HPA and region
* translations. Take topology mutation locks and lookup
* { HPA, REGION } from { DPA, MEMDEV } in the event record.
*/
guard(rwsem_read)(&cxl_region_rwsem);
guard(rwsem_read)(&cxl_dpa_rwsem);
dpa = le64_to_cpu(evt->common.phys_addr) & CXL_DPA_MASK;
cxlr = cxl_dpa_to_region(cxlmd, dpa);
if (cxlr)
hpa = cxl_trace_hpa(cxlr, cxlmd, dpa);
if (event_type == CXL_CPER_EVENT_GEN_MEDIA)
// General Media Event Record
trace_cxl_general_media(cxlmd, type, cxlr, hpa,
&evt->gen_media);
else if (event_type == CXL_CPER_EVENT_DRAM)
// DRAM Event Record
trace_cxl_dram(cxlmd, type, cxlr, hpa, &evt->dram);
}
}
EXPORT_SYMBOL_NS_GPL(cxl_event_trace_record, CXL);
5. cxl_clear_event_record()
清空事件记录
static int cxl_clear_event_record(struct cxl_memdev_state *mds,
enum cxl_event_log_type log,
struct cxl_get_event_payload *get_pl)
{
struct cxl_mbox_clear_event_payload *payload;
u16 total = le16_to_cpu(get_pl->record_count);
// #define CXL_CLEAR_EVENT_MAX_HANDLES U8_MAX 0xFF
u8 max_handles = CXL_CLEAR_EVENT_MAX_HANDLES;
// struct_size 是计算结构体加上额外数据的大小
// payload 结构体首地址,有额外 max_handles 个 handles 项, 大小一共 pl_size
size_t pl_size = struct_size(payload, handles, max_handles);
struct cxl_mbox_cmd mbox_cmd;
u16 cnt;
int rc = 0;
int i;
/* Payload size may limit the max handles */
if (pl_size > mds->payload_size) {
max_handles = (mds->payload_size - sizeof(*payload)) /
sizeof(__le16);
pl_size = struct_size(payload, handles, max_handles);
}
payload = kvzalloc(pl_size, GFP_KERNEL);
if (!payload)
return -ENOMEM;
*payload = (struct cxl_mbox_clear_event_payload) {
.event_log = log,
};
// 使用 CXL_MBOX_OP_CLEAR_EVENT_RECORD 命令
// CXL_MBOX_OP_CLEAR_EVENT_RECORD = 0x0101
// CXL 3.1r 8.2.9.2.3 Clear Event Records
mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_CLEAR_EVENT_RECORD,
.payload_in = payload,
.size_in = pl_size,
};
/*
* Clear Event Records uses u8 for the handle cnt while Get Event
* Record can return up to 0xffff records.
*/
i = 0;
for (cnt = 0; cnt < total; cnt++) {
struct cxl_event_record_raw *raw = &get_pl->records[cnt];
struct cxl_event_generic *gen = &raw->event.generic;
// event record hdr ref CXL 3.1r Table 8-43. Common Event Record Format
// hdr.handle: event log 独有的 handle, 主机需要设备清空事件时需要使用这个值
// 事件应按时间顺序清除, 设备应验证输入有效负载中指定的事件记录句柄是否按时间顺序排列
// 如果设备检测到执行清除事件记录时不会清除的较旧事件记录,则设备应返回 Invalid Handle ,并且不会清除任何指定的事件记录
payload->handles[i++] = gen->hdr.handle;
dev_dbg(mds->cxlds.dev, "Event log '%d': Clearing %u\n", log,
le16_to_cpu(payload->handles[i - 1]));
if (i == max_handles) {
// 如果有多个事件需要清除,一次payload 空间不够,那么要多次调用,i=0: 重新组下一包
payload->nr_recs = i;
rc = cxl_internal_send_cmd(mds, &mbox_cmd);
if (rc)
goto free_pl;
i = 0;
}
}
/* Clear what is left if any */
// 最后一包,可能i没到 max_handles
if (i) {
payload->nr_recs = i;
mbox_cmd.size_in = struct_size(payload, handles, i);
rc = cxl_internal_send_cmd(mds, &mbox_cmd);
if (rc)
goto free_pl;
}
free_pl:
kvfree(payload);
return rc;
}