最近接触到异构平台(不同芯片架构的处理器之间搭建的硬件平台)的项目,闲暇时间,看看源码,学习一下系统架构方面的知识。
项目中,一台多核ARM服务器,通过PCIe接口把一些计算量大的模块offload到以powerPC core为中控,另有fpga和dsp 协处理器的SOC芯片上。
PowerPC cores 做为中控模块,它主要做和arm进行任务信息交互, 理解任务信息,并将任务分发给fpga和dsp协处理器,起到的是调度器的作用。
本文介绍的是arm和powerPC之间通信机制的设计,也就是我们所说的inter-processor communication(IPC)。
具体流程如下:
每个步骤的大概实现代码设计如下:
Step 1: create IPC buffer
a) define IPC channel structure, it includes 2 sub-structures:
one is for control, include consumer/producer index, msg size and ring size
and one is for buffer pointer used to store the ipc data
typedef struct ipc_bd_ring_md {
volatile uint32_t pi; /**< Producer index and flag (MSB)
which flip for each Ring wrapping */
volatile uint32_t ci; /**< Consumer index and flag (MSB)
which flip for each Ring wrapping */
uint32_t ring_size; /**< depth (Used to roll-over pi/ci) */
uint32_t msg_size; /**< Size of the each buffer */
} __attribute__((packed)) ipc_br_md_t;
/** IPC buffer descriptor */
typedef struct ipc_buffer_desc {
union {
uint64_t host_virt; /**< msg's host virtual address */
struct {
uint32_t host_virt_l;
uint32_t host_virt_h;
};
};
uint32_t modem_ptr; /**< msg's modem physical address */
uint32_t len; /**< msg len */
} __attribute__((packed)) ipc_bd_t;
typedef struct ipc_channel {
uint32_t ch_id; /**< Channel id */
ipc_br_md_t md; /**< Metadata for BD ring */
ipc_bd_t bd_m[IPC_MAX_DEPTH]; /**< Buffer Descriptor on Modem */
} __attribute__((packed)) ipc_ch_t;
b) init the ipc channel
ch->md.ring_size = depth;
ch->md.pi = 0;
ch->md.ci = 0;
ch->md.msg_size = msg_size;
// ipc_mem_pool is predefined buffer pool in RAM
mem = ipc_malloc(&(ipc_mem_pool[queue_id]),
depth * msg_size, 0, &code);
if (!mem)
return code;
for (i = 0; i < depth; i++) {
ch->bd_m[i].modem_ptr = mem - RAM_START_ADDR;
ipc_m2h_32(ch->bd_m[i].modem_ptr,
&(hp->bd_m_modem_ptr[i]));
mem += msg_size;
}
Step 2: Register IPC info in Heterogeneous interface(hif) region.
a) define hif region.
struct gul_hif ipc_hif_area __attribute__ ((section (".hif.start"))) __attribute__ ((aligned (64)));
// set hif pointer to the ram start address.
pGulModPriv->pHif = &ipc_hif_area;
b) Update IPC offset and size in HIF
typedef struct ipc_instance {
uint32_t instance_id; /**< instance id, use to init this instance by ipc_init API */
ipc_ch_t ch_list[IPC_MAX_CHANNEL_COUNT]; /**< Channel descriptors in this instance */
} __attribute__((packed)) ipc_instance_t;
typedef struct ipc_metadata {
ipc_instance_t instance_list[IPC_MAX_INSTANCE_COUNT];
} __attribute__((packed)) ipc_metadata_t;
// priv->ipc_md is the start address of whole ipc region buffer
// for one ipc region, it include many instances, and for one instance, it also includes many ipc channels.
priv->pHif->ipc_regs.ipc_mdata_offset = ((uint32_t)priv->ipc_md - PEBM_BASE_ADDR;
priv->pHif->ipc_regs.ipc_mdata_size = (uint32_t)sizeof(ipc_metadata_t);
Step 3: Read BAR info in driver and pass it to the ARM applications
a) Read the BAR info using standard way in linux kernel pci driver
// get physical addr
mem_regions[i].phys_addr =
pci_resource_start(pdev, i);//BAR2
gul_dev->mem_regions[i].phys_addr +=
GUL_MEM_REGION_PEBM_OFFSET;
gul_dev->mem_regions[i].size =
GUL_MMAP_PEBM_SIZE; //1 MB
// get virtual addr
if (phys_addr && size) {
vaddr = ioremap(phys_addr, size);
if (!vaddr) {
goto out;
}
mem_region->vaddr = vaddr;
}
b) Pass the hif addr from kernel space to userspace using character device
static int gul_ipc_ioctl(struct inode *inode, struct file *filp,
unsigned int cmd, unsigned long arg)
{
case IOCTL_GUL_IPC_GET_PCI_MAP:
ret = copy_from_user(&mem_desc,
(mem_strt_addr_t *)arg,
sizeof(mem_strt_addr_t));
if (ret != 0)
return -EFAULT;
if (gul_ipc_pci_map_is_overlap(ipc_dev,
mem_desc.host_phys, mem_desc.size) < 0)
return -EINVAL;
if (gul_ipc_pci_map_alloc(ipc_dev, mem_desc.host_phys,
&mem_desc.modem_phys, mem_desc.size) < 0)
return -ENOMEM;
ret = copy_to_user((mem_strt_addr_t *)arg, &mem_desc,
sizeof(mem_strt_addr_t));
if (ret != 0)
return -EFAULT;
break;
}
Step 4: ARM application access the ipc region in RAM
// get the hif region
ret = ioctl(ipc_priv->dev_ipc, IOCTL_GUL_IPC_GET_PCI_MAP,
&ipc_priv->sys_map);
// get the ipc buffer with ipc_mdata_offset
ipc_md = (ipc_metadata_t *)((uint64_t)ipc_priv->peb_start.host_vaddr +
mhif->ipc_regs.ipc_mdata_offset);
// rd/wr the parameters in the ipc buffer
ch = &ipc_md->instance_list[instance_id].ch_list[q_priv->q_id];
/* Start init of channel */
ch->md.ring_size = rte_cpu_to_be_32(q_priv->queue_size);
ch->depth = rte_cpu_to_be_32(q_priv->queue_size);
ch->md.pi = 0;// pi++
ch->md.ci = 0; // ci--
ch->md.msg_size = msg_size;
上面涉及到一些驱动(pci drivers), 内核(物理/虚拟内存转换,hugepage等)等,我还在探索学习中,慢慢来吧。。。