一些相关头文件的信息
本芯片DMA的对外读写固定为NWRITE和NREAD。
dmaengine.h
struct dma_chan {
struct dma_device *device;
dma_cookie_t cookie;
dma_cookie_t completed_cookie;
/* sysfs */
int chan_id;
struct dma_chan_dev *dev;
struct list_head device_node;
struct dma_chan_percpu __percpu *local;
int client_count;
int table_count;
/* DMA router */
struct dma_router *router;
void *route_data;
void *private;
};
struct dma_chan_dev {
struct dma_chan *chan;
struct device device;
int dev_id;
atomic_t *idr_ref;
};
struct rio_transfer_io {
__u64 rio_addr; /* Address in target's RIO mem space */
__u64 loc_addr;
__u64 handle;
__u64 offset; /* Offset in buffer */
__u64 length; /* Length in bytes */
__u16 rioid; /* Target destID */
__u16 method; /* Data exchange method, one of rio_exchange enum */
__u32 completion_code; /* Completion code for this transfer */
};
struct rio_mmap {
__u16 rioid;
__u16 pad0[3];
__u64 rio_addr;
__u64 length;
__u64 handle;
__u64 address;
};
struct rio_mport {
struct list_head dbells; /* list of doorbell events */
struct list_head pwrites; /* list of portwrite events */
struct list_head node; /* node in global list of ports */
struct list_head nnode; /* node in net list of ports */
struct rio_net *net; /* RIO net this mport is attached to */
struct mutex lock;
struct resource iores;
struct resource riores[RIO_MAX_MPORT_RESOURCES];
struct rio_msg inb_msg[RIO_MAX_MBOX];
struct rio_msg outb_msg[RIO_MAX_MBOX];
int host_deviceid; /* Host device ID */
struct rio_ops *ops; /* low-level architecture-dependent routines */
unsigned char id; /* port ID, unique among all ports */
unsigned char index; /* port index, unique among all port
interfaces of the same type */
unsigned int sys_size; /* RapidIO common transport system size.
* 0 - Small size. 256 devices.
* 1 - Large size, 65536 devices.
*/
u32 phys_efptr;
u32 phys_rmap;
unsigned char name[RIO_MAX_MPORT_NAME];
struct device dev;
void *priv; /* Master port private data */
#ifdef CONFIG_RAPIDIO_DMA_ENGINE
struct dma_device dma;
#endif
struct rio_scan *nscan;
atomic_t state;
unsigned int pwe_refcnt;
};
驱动基本调用分析
应用程序调用的ioctl 对应底层的内容为rio_mport_cdev.c:
/*
* mport_cdev_ioctl() - IOCTLs for character device
*/
static long mport_cdev_ioctl(struct file *filp,
unsigned int cmd, unsigned long arg)
{
int err = -EINVAL;
struct mport_cdev_priv *data = filp->private_data;
struct mport_dev *md = data->md;
if (atomic_read(&md->active) == 0)
return -ENODEV;
switch (cmd) {
.....
case RIO_MPORT_MAINT_READ_LOCAL:
return rio_mport_maint_rd(data, (void __user *)arg, 1);
.....
case RIO_TRANSFER:
return rio_mport_transfer_ioctl(filp, (void __user *)arg);
......
default:
break;
}
return err;
}
MAINT 包的读写
MAINT 包的读写:
static int rio_mport_maint_rd(struct mport_cdev_priv *priv, void __user *arg, int local)//local 为1表示是本身,0表示是对端地址
static int rab_lcread(struct rio_mport *mport, int index, u32 offset, int len, u32 *data)
{
.......
*data = ioread32(priv->grio_cfg_regs + offset);
return 0;
}
static int rab_lcwrite(struct rio_mport *mport, int index, u32 offset, int len, u32 data)
{
.......
iowrite32(word_reverse(data), priv->grio_cfg_regs + offset);
return 0;
}
static int rab_cread(struct rio_mport *mport, int index, u16 destid,u8 hopcount, u32 offset, int len, u32 *data)
{
.......
map_maint_window(p, destid, 0, 0x100000);
*data = word_reverse(ioread32(p->apio_maint_regs + offset));
....
}
static int rab_cwrite(struct rio_mport *mport, int index, u16 destid,u8 hopcount, u32 offset, int len, u32 data)
{
......
map_maint_window(p, destid, 0, 0x100000);
iowrite32(data, p->apio_maint_regs + offset);
......
}
/*
* We set APIO window 0 as a default maintenance window.
* size: default 1M for RIO maintenance space.
*/
void map_maint_window(struct mobi_rab_dev *p, u16 destid, u32 dst_cfg_base,
u32 size)
{
.......
/* configure APIO address map size */
val32 = size & (~0x3ff);
iowrite32(val32, reg_base + RAB_APIO_AMAP_SIZE_OFF(0));
/* configure APIO source AXI address */
val32 = (u32)p->maint_wind.phys_addr >> 10;//mobi_rab_priv->maint_wind.phys_addr = 0xfae00000;
iowrite32(val32, reg_base + RAB_APIO_AMAP_ABAR_OFF(0));
/* configure APIO destination maintenance RIO address */
val32 = ((dst_cfg_base & 0x3fff) >> 10) | ((0 & 0xff) << 14);
iowrite32(val32, reg_base + RAB_APIO_AMAP_RBAR_OFF(0));
}
APIO设置
maint 设置和mem设置主要是在窗口类型上有区别其他都一样。
void map_maint_window(struct mobi_rab_dev *p, u16 destid, u32 dst_cfg_base,
u32 size)
.....
/* configure maintenance read and write type */
val32 = RAB_APIO_AMAP_ENABLE | RAB_APIO_AMAP_TYPE_MAINT_RD_WR |
(xamsbs_addr << 5)|((0 & 0x3) << 3);
.....
}
s32 mem_window_set(struct mobi_rab_dev *p, u16 destid,
u64 dst_mem_base, u32 size, u32 win_num)
{
.....
/* enable memory APIO */
.....
/* configure memory read and write type */
xamsbs_addr = 0x3 & (dst_mem_base >> 32);
val32 = RAB_APIO_AMAP_ENABLE | RAB_APIO_MEM_MAP_ENABLE |
(xamsbs_addr << 5)|((0 & 0x3) << 3);
val32 |= destid << 16;
iowrite32(val32, reg_base + RAB_APIO_AMAP_CTRL_OFF(win_num));
.....
.......
return 0;
}
DMA发送分析
DMAengine controller documentation
DMA底层描述符存放空间初始
mobi_rio_register_mport中调用了rab_register_dma
在mobi_dma.c 中的rab_register_dma对DMA进行了相关的初始化。
struct dma_chan {//标准的DMA chan 结构体
struct dma_device *device;
dma_cookie_t cookie;
dma_cookie_t completed_cookie;
/* sysfs */
int chan_id;
struct dma_chan_dev *dev;
struct list_head device_node;
struct dma_chan_percpu __percpu *local;
int client_count;
int table_count;
/* DMA router */
struct dma_router *router;
void *route_data;
void *private;
};
struct rab_dma_chan {//mobi_grio.h rapidio 定义的dma chan 相关结构体包含硬件信息和dma chan 的关系
u32 id;
/* number of buffer descriptors */
int bd_num;
/* start of DMA descriptors */
void *bd_base;
dma_addr_t bd_phys;
void __iomem *regs;
struct dma_chan dchan;
enum dma_transfer_direction dir;
spinlock_t lock;
bool active;
struct list_head queue;
struct list_head free_list;
struct list_head active_list;
struct rab_tx_desc *tx_desc;
struct rab_tx_desc *active_tx;
};
int rab_register_dma(struct mobi_rab_dev *priv)
{
int i = 0;
int err = 0;
u32 tmp32 = 0;
int nr_channels = 0;
struct rio_mport *mport = &priv->mport;
void __iomem *reg_base = priv->grio_cfg_regs;
/*
* enable RAB WDMA and RDMA
* Default: arrange Prefetch Size = 1 of DMA Descriptors Array
*/
tmp32 = ioread32(reg_base + RAB_CTRL_OFF);
tmp32 |= (RAB_CTRL_WDMA_ENABLE | RAB_CTRL_RDMA_ENABLE | (RAB_CTRL_DMA_1_PREFETCH << 8));
iowrite32(tmp32, reg_base + RAB_CTRL_OFF);
INIT_LIST_HEAD(&mport->dma.channels);//初始化一个DMA的链表
for (i = 0; i < RAB_ABSTRACT_DMA_NUM; i++) {//rapidio有8个dma通道,逐个注册进去
struct rab_dma_chan *rab_dchan = &priv->dma[i];//使用struct dma_chan(图片1中的DCn)抽象物理的DMA channel(图片1中的CHn),物理channel和controller所能提供的通道数一一对应。
rab_dchan->dchan.device = &mport->dma;//初始DMA chan的设备类似于上一级的指针
rab_dchan->dchan.cookie = 1;//初始DMA chan cookie,一个整型数,用于追踪本次传输。一般情况下,dma controller driver会在内部维护一个递增的number,每当client获取传输描述的时候(参考3.3中的介绍),都会将该number赋予cookie,然后加一。
rab_dchan->dchan.chan_id = i;//初始DMA chan chan的通道号 dma_async_tx_descriptor
rab_dchan->id = i;//初始化rab_dma_chan 的通道号
rab_dchan->active = false;//初始化rab_dma_chan 有效标准
rab_dchan->regs = priv->grio_cfg_regs;//rab_dma_chan 对应的寄存器地址
/* init a lock to sychronize the use of associated channel */
spin_lock_init(&rab_dchan->lock);
/* point current active transfer descriptor on a channel */
rab_dchan->active_tx = NULL;
/* init a queue per channel for transfer */
INIT_LIST_HEAD(&rab_dchan->queue);// queue链表用于存放desc_node 在rab_tx_submit 会添加链表
INIT_LIST_HEAD(&rab_dchan->free_list);//存放要用于释放的desc_node,在中断添加要释放的desc_node,并在下次发送的时候释放,在rab_alloc_chan_resources初始化时desc 初始化是会将所有的desc设置到free_list,在中断中也会将发送完成的desc_node 加入free_list。
/* Add a channel to DMA device */
list_add_tail(&rab_dchan->dchan.device_node,&mport->dma.channels);//加本device_node
nr_channels++;
}
mport->dma.chancnt = nr_channels;
//DMA 初始化看DMAengine controller documentation 链接
dma_cap_zero(mport->dma.cap_mask);//清空属性h
dma_cap_set(DMA_PRIVATE, mport->dma.cap_mask);//设置DMA_PRIVATE属性: The devices only supports slave transfers, and as such isn’t available for async transfers.
dma_cap_set(DMA_SLAVE, mport->dma.cap_mask);//设置DMA_SLAVE属性
mport->dma.dev = priv->dev;//DMA 指向当前设备should hold the pointer to the struct device associated to your current driver instance
mport->dma.device_alloc_chan_resources = rab_alloc_chan_resources;//dma 开始时获取资源
mport->dma.device_free_chan_resources = rab_free_chan_resources;//dma结束是释放资源
mport->dma.device_tx_status = rab_tx_status;//发送状态报告,上传剩余字节长度等信息
mport->dma.device_prep_slave_sg = rab_prep_rio_sg;//dmaengine_prep_slave_sg 调用底层设置
mport->dma.device_issue_pending = rab_issue_pending;//dma_async_issue_pending 调用底层查询发送状态,在两个发送间调用保证发送结束。
mport->dma.device_terminate_all = rab_terminate_all;//放弃传输控制
err = dma_async_device_register(&mport->dma);//把填充好的dma_device结构实体注册到内核中
if (err)
dev_err(priv->dev, "Failed to register DMA device");
return err;
}
在mobi_dma.c 中调用
static int rab_alloc_chan_resources(struct dma_chan *dchan)
{
struct rab_dma_chan *rab_dchan = to_rab_chan(dchan);
struct rab_tx_desc *desc = NULL;
int i = 0;
dev_dbg(&dchan->dev->device, "DMAC%d", rab_dchan->id);
/* Initialize DMA channel */
if (rab_dma_init(rab_dchan, dma_queue_sz)) {//设置底层的的rab_dchan的结构体rab_dma_chan(rapidio 底层IP发送描述符)
dev_err(&dchan->dev->device, "Unable to initialize DMAC%d", rab_dchan->id);
return -ENODEV;
}
/* Allocate queue of transaction descriptors */
desc = kcalloc(dma_queue_sz, sizeof(struct rab_tx_desc),GFP_ATOMIC);//申请发送描述符的存放空间rab_tx_desc (中间层的发送描述结构体)
if (!desc) {
rab_dma_ch_free(rab_dchan);//申请失败后释放
return -ENOMEM;
}
rab_dchan->tx_desc = desc;//DMA 发送描述指针保存入dma 通道的rab_tx_desc 指针
for (i = 0; i < dma_queue_sz; i++) {
dma_async_tx_descriptor_init(&desc[i].txd, dchan);//将dma发送底层实现的结构体dma_async_tx_descriptor传递给dchan
desc[i].txd.tx_submit = rab_tx_submit;//设置发送函数
desc[i].txd.flags = DMA_CTRL_ACK;//设置发送标志
list_add(&desc[i].desc_node, &rab_dchan->free_list);//初始化时将所有的描述符设置为清除的列表
}
dma_cookie_init(dchan);//初始化cookie标志
rab_dchan->active = true;//设置通道状态为使能
rab_dma_intr_enable(rab_dchan, 1);//dma中断使能
return dma_queue_sz;
}
static int rab_dma_init(struct rab_dma_chan *dma_chan, int bd_num)
{
struct rab_dma_desc *desc_ptr = NULL;
struct device *dev = dma_chan->dchan.device->dev;
dma_addr_t bd_phys = 0;
/*
* Allocate space for DMA descriptors
* desc_ptr: virtual address used by IP register.
* bd_phys: physical address used by user space.
*/
desc_ptr = dma_zalloc_coherent(dev,
bd_num * sizeof(struct rab_dma_desc),
&bd_phys, GFP_KERNEL);
if (!desc_ptr)
return -ENOMEM;
dma_chan->bd_num = bd_num;
dma_chan->bd_phys = bd_phys;
dma_chan->bd_base = desc_ptr;
........
return 0;
}
DMA需要的APIO空间申请和注销
初始化前需要调用 ioctl的RIO_MAP_OUTBOUND申请OUBOUND的对应窗口设置和RIO_UNMAP_OUTBOUND释放。
为什么要做这个处理参看我的《rapidio学习使用过程摘抄》的AXI 地址对应RAPIDIO地址转换(AXI PIO Mode)部分章节。
struct rio_mmap amap = {0};
/* initial apio */
amap.rioid =dest_id;
amap.rio_addr = target_addr;
amap.length = GRIO_DMA_TEST_LENGTH;
res = ioctl(mport_fd, RIO_MAP_OUTBOUND, &amap);
.........
res = ioctl(mport_fd, RIO_UNMAP_OUTBOUND, &amap.handle);
驱动中DMA输出映射关系设置
rio_mport_cdev.c文件夹下
static int rio_mport_obw_map(struct file *filp, void __user *arg)
{
struct mport_cdev_priv *priv = filp->private_data;
struct mport_dev *data = priv->md;
struct rio_mmap map;
dma_addr_t paddr;
int ret;
if (unlikely(copy_from_user(&map, arg, sizeof(map))))
return -EFAULT;
.....
ret = rio_mport_get_outbound_mapping(data, filp, map.rioid, map.rio_addr, map.length, &paddr);
if (ret < 0) {
......
return ret;
}
map.handle = paddr;
if (unlikely(copy_to_user(arg, &map, sizeof(map))))
return -EFAULT;
return 0;
}
rio_mport_cdev.c文件夹下
```c
static int rio_mport_get_outbound_mapping(struct mport_dev *md, struct file *filp,
u16 rioid, u64 raddr, u32 size,
dma_addr_t *paddr)
{
struct rio_mport_mapping *map;
int err = -ENOMEM;
mutex_lock(&md->buf_mutex);
list_for_each_entry(map, &md->mappings, node) {
if (map->dir != MAP_OUTBOUND)
continue;
if (rioid == map->rioid &&
raddr == map->rio_addr && size == map->size) {
*paddr = map->phys_addr;
err = 0;
break;
} else if (rioid == map->rioid &&
raddr < (map->rio_addr + map->size - 1) &&
(raddr + size) > map->rio_addr) {
err = -EBUSY;
break;
}
}
/* If not found, create new */
if (err == -ENOMEM)
err = rio_mport_create_outbound_mapping(md, filp, rioid, raddr,
size, paddr);
mutex_unlock(&md->buf_mutex);
return err;
}
rio_mport_cdev.c中注意list_add_tail(&map->node, &md->mappings);这个部分,该部分将申请的rio_map_outb_region信息加入了链表中,其中包括了paddr发送的源物理地址。该处和发送部分的rio_dma_transfer函数的if (xfer->loc_addr) 判断loc_addr为0的情况有关,也就是上层没有提供应用层的虚拟地址的时候,发送可以直接使用设置的物理地址发送数据。
/*
* Inbound/outbound memory mapping functions
*/
static int rio_mport_create_outbound_mapping(struct mport_dev *md, struct file *filp,
u16 rioid, u64 raddr, u32 size,
dma_addr_t *paddr)
{
struct rio_mport *mport = md->mport;
struct rio_mport_mapping *map;
int ret;
rmcd_debug(OBW, "did=%d ra=0x%llx sz=0x%x", rioid, raddr, size);
map = kzalloc(sizeof(*map), GFP_KERNEL);
if (map == NULL)
return -ENOMEM;
ret = rio_map_outb_region(mport, rioid, raddr, size, 0, paddr);
if (ret < 0)
goto err_map_outb;
map->dir = MAP_OUTBOUND;
map->rioid = rioid;
map->rio_addr = raddr;
map->size = size;
map->phys_addr = *paddr;//下层是调用>ob_win[i].phys_addr;获取
map->filp = filp;
map->md = md;
kref_init(&map->ref);
list_add_tail(&map->node, &md->mappings);
return 0;
err_map_outb:
kfree(map);
return ret;
}
roi.c中 调用注册的map_outb函数,使用了原子锁防止重复调用
int rio_map_outb_region(struct rio_mport *mport, u16 destid, u64 rbase,
u32 size, u32 rflags, dma_addr_t *local)
{
int rc;
unsigned long flags;
if (!mport->ops->map_outb)
return -ENODEV;
spin_lock_irqsave(&rio_mmap_lock, flags);
rc = mport->ops->map_outb(mport, destid, rbase, size,
rflags, local);
spin_unlock_irqrestore(&rio_mmap_lock, flags);
return rc;
在mobi_rio_register_mport 函数初始化
ops->map_outb = rab_map_outb_mem;
mobi_grio.c 中 使能APIO 并调用mem_window_set设置APIO的输出窗口
static int rab_map_outb_mem(struct rio_mport *mport, u16 destid,
u64 rstart, u32 size, u32 flags, dma_addr_t *laddr)
{
struct mobi_rab_dev *p = mport->priv;
void __iomem *reg_base = p->grio_cfg_regs;
u32 i = 0;
u32 tmp32 = 0;
s32 ret = 0;
/* enable RAB APIO */
tmp32 = ioread32(reg_base + RAB_CTRL_OFF);
tmp32 |= RAB_CTRL_APIO_ENABLE;
iowrite32(tmp32, reg_base + RAB_CTRL_OFF);
/* enable APIO engine */
tmp32 = RAB_APIO_ENABLE;
iowrite32(tmp32, reg_base + RAB_APIO_CTRL_OFF(0));
for (i = 1; i < RAB_OBWIN_NUM; i++) {
if (!p->ob_win[i].active) {
ret = mem_window_set(p, destid, rstart, size, i);
break;
}
}
if ((i == RAB_OBWIN_NUM) || ret)
return -EBUSY;
*laddr = (dma_addr_t)p->ob_win[i].phys_addr;
return 0;
}
mobi_grio.c 中mem_window_set是设置APIO的映射关系。
s32 mem_window_set(struct mobi_rab_dev *p, u16 destid,
u64 dst_mem_base, u32 size, u32 win_num)
{
void __iomem *reg_base = p->grio_cfg_regs;
u32 val32 = 0;
u32 xamsbs_addr = 0;
p->ob_win[win_num].base = dma_alloc_coherent(p->dev, size,&p->ob_win[win_num].phys_addr, GFP_KERNEL);
if (!p->ob_win[win_num].base)
return -ENOMEM;
p->ob_win[win_num].destid = destid;
p->ob_win[win_num].ismem = true;
p->ob_win[win_num].rstart = dst_mem_base;
p->ob_win[win_num].size = size;
p->ob_win[win_num].active = true;
/* enable memory APIO */
val32 = ioread32(reg_base + RAB_APIO_CTRL_OFF(win_num));
val32 |= RAB_APIO_MEM_MAP_ENABLE;
iowrite32(val32, reg_base + RAB_APIO_CTRL_OFF(win_num));
/* configure memory read and write type */
xamsbs_addr = 0x3 & (dst_mem_base >> 32);
val32 = RAB_APIO_AMAP_ENABLE | RAB_APIO_MEM_MAP_ENABLE |
(xamsbs_addr << 5)|((0 & 0x3) << 3);
val32 |= destid << 16;
iowrite32(val32, reg_base + RAB_APIO_AMAP_CTRL_OFF(win_num));
/* configure APIO address map size */
val32 = size & (~0x3ff);
iowrite32(val32, reg_base + RAB_APIO_AMAP_SIZE_OFF(win_num));
/* configure APIO source AXI address */
val32 = (u32)p->ob_win[win_num].base >> 10;
iowrite32(val32, reg_base + RAB_APIO_AMAP_ABAR_OFF(win_num));
/* configure APIO destination RIO address */
val32 = (dst_mem_base >> 10) | ((0 & 0xff) << 14);
iowrite32(val32, reg_base + RAB_APIO_AMAP_RBAR_OFF(win_num));
.....
return 0;
}
DMA 发送说明
LINUX 应用调用方式
下面是一段DMA的调用测试代码,使用ioctl调用RIO_TRANSFER选项进行DMA数据发送。
struct rio_transfer_io w_xfer = {0};
w_xfer.rioid = dest_id;
w_xfer.rio_addr = target_addr + dst_off;
w_xfer.loc_addr = (u32)buf;
w_xfer.length = ret_in;
w_xfer.handle = 0;
w_xfer.offset = 0;
w_xfer.method = RIO_EXCHANGE_NWRITE_R;
w_tran.transfer_mode = RIO_TRANSFER_MODE_TRANSFER;
w_tran.sync = RIO_TRANSFER_SYNC;
w_tran.dir = RIO_TRANSFER_DIR_WRITE;
/* Must be 1 requested by RapidIO subsystem */
w_tran.count = 1;
w_tran.block = (u32)&w_xfer;
res = ioctl(mport_fd, RIO_TRANSFER, &w_tran);
if (res) {
printf("DMA write fail %d!\n", res);
return;
}
驱动 rio_mport_transfer_ioctl 调用实现上层ioctl
rio_mport_cdev.c 函数rio_mport_transfer_ioctl主要是调用rio_dma_transfer实现发送:
static int rio_mport_transfer_ioctl(struct file *filp, void __user *arg)
{
struct mport_cdev_priv *priv = filp->private_data;
struct rio_transaction transaction;
struct rio_transfer_io *transfer;
enum dma_data_direction dir;
int i, ret = 0;
if (unlikely(copy_from_user(&transaction, arg, sizeof(transaction))))
return -EFAULT;
if (transaction.count != 1) /* only single transfer for now */
return -EINVAL;
if ((transaction.transfer_mode &
priv->md->properties.transfer_mode) == 0)
return -ENODEV;
transfer = vmalloc(array_size(sizeof(*transfer), transaction.count));
if (!transfer)
return -ENOMEM;
if (unlikely(copy_from_user(transfer,
(void __user *)(uintptr_t)transaction.block,
transaction.count * sizeof(*transfer)))) {
ret = -EFAULT;
goto out_free;
}
dir = (transaction.dir == RIO_TRANSFER_DIR_READ) ?
DMA_FROM_DEVICE : DMA_TO_DEVICE;
for (i = 0; i < transaction.count && ret == 0; i++)
ret = rio_dma_transfer(filp, transaction.transfer_mode,
transaction.sync, dir, &transfer[i]);
if (unlikely(copy_to_user((void __user *)(uintptr_t)transaction.block, transfer, transaction.count * sizeof(*transfer))))
ret = -EFAULT;
out_free:
vfree(transfer);
return ret;
}
sg_alloc 分散/聚集 和DMA有关可以看链接了解
Linux内核scatterlist API介绍链接
Dynamic DMA mapping Guide
理解DMA部分一定要看这个链接分散/聚集DMA 介绍
其他平台DMA介绍
两个概念:
1、一致性DMA映射(Consistent DMA mappings )
Consistent DMA mapping有下面两种特点:
(1)持续使用该DMA buffer(不是一次性的),因此Consistent DMA总是在初始化的时候进行map,在shutdown的时候unmap。
(2)CPU和DMA controller在发起对DMA buffer的并行访问的时候不需要考虑cache的影响,也就是说不需要软件进行cache操作,CPU和DMA controller都可以看到对方对DMA buffer的更新。实际上一致性DMA映射中的那个Consistent实际上可以称为coherent,即cache coherent
2、 流式DMA映射(streaming DMA mapping)
流式DMA映射是一次性的,一般是需要进行DMA传输的时候才进行mapping,一旦DMA传输完成,就立刻ummap(除非你使用dma_sync_*的接口,下面会描述)。并且硬件可以为顺序化访问进行优化。这里的streaming可以被认为是asynchronous,或者是不属于coherent memory范围的。
使用scatterlist的原因就是系统在运行的时候内存会产生很多碎片,比如4k,100k的,1M的,有时候对应磁盘碎片,总之就是碎片。而在网络 和磁盘操作中很多时候需要传送大块的数据,尤其是使用DMA的时候,因为DMA操作的物理地址必须是连续的。假设要1M内存,此时可以分配一个整的1M内 存,也可以把10个10K的和9个100K的组成一块1M的内存,当然这19个块可能是不连续的,也可能其中某些或全部是连续的,总之情况不定,为了描述 这种情况,就引入了scatterlist,其实看成一个关于内存块构成的链表就OK了。sg_alloc_table就是申请一个这样的东西(细节查看链接其他平台DMA介绍)
rio_mport_cdev.c 文件中rio_dma_transfer函数首先把发送内容进行分解。如果是应用层发来的数据,将应用层分解成一个个的页进行数据发送。
若虚拟页已映射物理页xfer->loc_addr!=0,则调用 find_vma_intersection、follow_pfn和 pfn_to_page 函数。若未映射,则调用 get_user_pages_fast 函数。从而获取以用户虚拟地址开始的5个虚拟页对应的物理页,将它们的描述结构保存在pages数组中。调用sg_alloc_table_from_pages 函数,传入上面获取的pages 数组,即可得到一个sg_tabel散列表,即图2.2右侧4个条目合起来称为sg_tabel散列表。
(这部分看分散/聚集DMA 介绍链接)
如果应用层是赋值物理地址xfer->loc_add==0,那么其中
dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction)
返回值:被映射的物理内存块的数量(如果在分散/聚集链表中一些元素是物理地址或虚拟地址相邻的,将它们映射成单个内存块,则返回值可能比输入值小)。和前面page的类似,不过该部分的是物理地址不是page内容组成。
需要注意的东西
1.程序中定义了dma_queue_sz 大小,DMA最大是1M的发送数据量。大数据包要拆分成很多小的DMA,用链表形式串起来。dma_queue_sz 就是这个链表的最大数量,地址是应用的内存给出的话,该值需要等于最多可以分成 size/4096个链表个数。
/* indexs the number of DMA descriptors */
static unsigned int dma_queue_sz = 4;
2.由于分散/聚集调用sg_alloc_table_from_pages其中有的部分连续内存会产生合并,而本DMA最大支持1M内存拷贝。所以需要进行特殊处理,拆分对应的连续内存。
KREF 介绍
static int rio_dma_transfer(struct file *filp, u32 transfer_mode,
enum rio_transfer_sync sync, enum dma_data_direction dir,
struct rio_transfer_io *xfer)//其中xfer为应用层直接赋值传输进来的信息
{
struct mport_cdev_priv *priv = filp->private_data;
unsigned long nr_pages = 0;
struct page **page_list = NULL;
struct mport_dma_req *req;
struct mport_dev *md = priv->md;//mport_cdev_add赋值相关内容
struct dma_chan *chan;
int i, ret;
int nents;
if (xfer->length == 0)
return -EINVAL;
req = kzalloc(sizeof(*req), GFP_KERNEL);
if (!req)
return -ENOMEM;
ret = get_dma_channel(priv);
if (ret) {
kfree(req);
return ret;
}
chan = priv->dmach;
kref_init(&req->refcount);
init_completion(&req->req_comp);
req->dir = dir;
req->filp = filp;
req->priv = priv;
req->dmach = chan;
req->sync = sync;
/*
* If parameter loc_addr != NULL, we are transferring data from/to
* data buffer allocated in user-space: lock in memory user-space
* buffer pages and build an SG table for DMA transfer request
*
* Otherwise (loc_addr == NULL) contiguous kernel-space buffer is
* used for DMA data transfers: build single entry SG table using
* offset within the internal buffer specified by handle parameter.
*/
if (xfer->loc_addr) {//我们使用的程序使用loc_addr 有赋值,赋值的是应用层申请的内存地址实际为虚拟地址。
unsigned int offset;
long pinned;
offset = lower_32_bits(offset_in_page(xfer->loc_addr));
nr_pages = PAGE_ALIGN(xfer->length + offset) >> PAGE_SHIFT;
page_list = kmalloc_array(nr_pages, sizeof(*page_list), GFP_KERNEL);
if (page_list == NULL) {
ret = -ENOMEM;
goto err_req;
}
pinned = get_user_pages_fast((unsigned long)xfer->loc_addr & PAGE_MASK, nr_pages, dir == DMA_FROM_DEVICE, page_list);//将xfer->loc_addr 也就是应用获取的内存的page的对应信息放入page_list(设定的一个数组)
if (pinned != nr_pages) {
if (pinned < 0) {
....
nr_pages = 0;
} else
....
ret = -EFAULT;
goto err_pg;
}
ret = sg_alloc_table_from_pages(&req->sgt, page_list, nr_pages, offset, xfer->length, GFP_KERNEL);
if (ret) {
....
goto err_pg;
}
req->page_list = page_list;
req->nr_pages = nr_pages;
} else {//loc_addr没有赋值,那么使用的是ioctl RIO_ALLOC_DMA 申请的DMA内存。
dma_addr_t baddr;
struct rio_mport_mapping *map;
baddr = (dma_addr_t)xfer->handle;//在rio_mport_obw_map 调用mem_window_set申请的dma 内存空间*laddr = (dma_addr_t)p->ob_win[i].phys_addr;
mutex_lock(&md->buf_mutex);
list_for_each_entry(map, &md->mappings, node) {//&md->mappings在mport_cdev_add初始化相关内容
if (baddr >= map->phys_addr &&
baddr < (map->phys_addr + map->size)) {//baddr 如果在映射地址范围内,map APIO地址设置req->map = map
kref_get(&map->ref);
req->map = map;
break;
}
}
mutex_unlock(&md->buf_mutex);
if (req->map == NULL) {
ret = -ENOMEM;
goto err_req;
}
if (xfer->length + xfer->offset > map->size) {
ret = -EINVAL;
goto err_req;
}
ret = sg_alloc_table(&req->sgt, 1, GFP_KERNEL);//该处和前面page模式sg_alloc_table_from_pages不同,申请一个sg的table
if (unlikely(ret)) {
rmcd_error("sg_alloc_table failed for internal buf");
goto err_req;
}
sg_set_buf(req->sgt.sgl, map->virt_addr + (baddr - map->phys_addr) +xfer->offset, xfer->length);//将申请的虚拟内存的的地址放入sg的table内容中。其虚拟地址根据baddr 的设置和映射内存的对应关系需要偏移(baddr - map->phys_addr)地址,保证数据是能被APIO映射到的,再根据xfer->offset可以进行偏移
}
nents = dma_map_sg(chan->device->dev, req->sgt.sgl, req->sgt.nents, dir);
if (nents == 0) {
rmcd_error("Failed to map SG list");
ret = -EFAULT;
goto err_pg;
}
ret = do_dma_request(req, xfer, sync, nents);
if (ret >= 0) {
if (sync == RIO_TRANSFER_ASYNC)
return ret; /* return ASYNC cookie */
} else {
....
}
err_pg:
if (!req->page_list) {
for (i = 0; i < nr_pages; i++)
put_page(page_list[i]);
kfree(page_list);
}
err_req:
kref_put(&req->refcount, dma_req_free);
return ret;
}
rio_mport_cdev.c 文件下:
/*
* DMA transfer functions
*/
static int do_dma_request(struct mport_dma_req *req, struct rio_transfer_io *xfer, enum rio_transfer_sync sync, int nents)
{
struct mport_cdev_priv *priv;
struct sg_table *sgt;
struct dma_chan *chan;
struct dma_async_tx_descriptor *tx;
dma_cookie_t cookie;
unsigned long tmo = msecs_to_jiffies(dma_timeout);
enum dma_transfer_direction dir;
long wret;
int ret = 0;
priv = req->priv;
sgt = &req->sgt;
chan = priv->dmach;
dir = (req->dir == DMA_FROM_DEVICE) ? DMA_DEV_TO_MEM : DMA_MEM_TO_DEV;
....
/* Initialize DMA transaction request */
tx = prep_dma_xfer(chan, xfer, sgt, nents, dir, DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
if (!tx) {
....
ret = -EIO;
goto err_out;
} else if (IS_ERR(tx)) {
ret = PTR_ERR(tx);
....
xfer->rio_addr, xfer->length);
goto err_out;
}
tx->callback = dma_xfer_callback;
tx->callback_param = req;
req->status = DMA_IN_PROGRESS;
kref_get(&req->refcount);
cookie = dmaengine_submit(tx);
req->cookie = cookie;
...
if (dma_submit_error(cookie)) {
....
kref_put(&req->refcount, dma_req_free);
ret = -EIO;
goto err_out;
}
dma_async_issue_pending(chan);
if (sync == RIO_TRANSFER_ASYNC) {
spin_lock(&priv->req_lock);
list_add_tail(&req->node, &priv->async_list);
spin_unlock(&priv->req_lock);
return cookie;
} else if (sync == RIO_TRANSFER_FAF)
return 0;
wret = wait_for_completion_interruptible_timeout(&req->req_comp, tmo);
if (wret == 0) {
......
return -ETIMEDOUT;
} else if (wret == -ERESTARTSYS) {
....
current->comm, task_pid_nr(current),
(dir == DMA_DEV_TO_MEM)?"READ":"WRITE", cookie);
return -EINTR;
}
if (req->status != DMA_COMPLETE) {
......
ret = -EIO;
}
err_out:
return ret;
}
prep_dma_xfer 传输准备过程
rio_mport_cdev.c 文件下:
/*
* prep_dma_xfer() - Configure and send request to DMAengine to prepare DMA
* transfer object.
* Returns pointer to DMA transaction descriptor allocated by DMA driver on
* success or ERR_PTR (and/or NULL) if failed. Caller must check returned
* non-NULL pointer using IS_ERR macro.
*/
static struct dma_async_tx_descriptor
*prep_dma_xfer(struct dma_chan *chan, struct rio_transfer_io *transfer,
struct sg_table *sgt, int nents, enum dma_transfer_direction dir,
enum dma_ctrl_flags flags)
{
struct rio_dma_data tx_data;
tx_data.sg = sgt->sgl;
tx_data.sg_len = nents;
tx_data.rio_addr_u = 0;
tx_data.rio_addr = transfer->rio_addr;
if (dir == DMA_MEM_TO_DEV) {
switch (transfer->method) {
case RIO_EXCHANGE_NWRITE:
tx_data.wr_type = RDW_ALL_NWRITE;
break;
case RIO_EXCHANGE_NWRITE_R_ALL:
tx_data.wr_type = RDW_ALL_NWRITE_R;
break;
case RIO_EXCHANGE_NWRITE_R:
tx_data.wr_type = RDW_LAST_NWRITE_R;
break;
case RIO_EXCHANGE_DEFAULT:
tx_data.wr_type = RDW_DEFAULT;
break;
default:
return ERR_PTR(-EINVAL);
}
}
return rio_dma_prep_xfer(chan, transfer->rioid, &tx_data, dir, flags);
}
rio.c 下:
/**
* rio_dma_prep_xfer - RapidIO specific wrapper
* for device_prep_slave_sg callback defined by DMAENGINE.
* @dchan: DMA channel to configure
* @destid: target RapidIO device destination ID
* @data: RIO specific data descriptor
* @direction: DMA data transfer direction (TO or FROM the device)
* @flags: dmaengine defined flags
*
* Initializes RapidIO capable DMA channel for the specified data transfer.
* Uses DMA channel private extension to pass information related to remote
* target RIO device.
*
* Returns: pointer to DMA transaction descriptor if successful,
* error-valued pointer or NULL if failed.
*/
struct dma_async_tx_descriptor *rio_dma_prep_xfer(struct dma_chan *dchan,
u16 destid, struct rio_dma_data *data,
enum dma_transfer_direction direction, unsigned long flags)
{
struct rio_dma_ext rio_ext;
.......
rio_ext.destid = destid;
rio_ext.rio_addr_u = data->rio_addr_u;
rio_ext.rio_addr = data->rio_addr;
rio_ext.wr_type = data->wr_type;
return dmaengine_prep_rio_sg(dchan, data->sg, data->sg_len,
direction, flags, &rio_ext);
}
在注册DMA时有赋值,所以上面device_prep_slave_sg调用的实际为rab_prep_rio_sg函数。
mport->dma.device_prep_slave_sg = rab_prep_rio_sg;
dma.h下device_prep_slave_sg实际调用的是mobi_dma.c的rab_prep_rio_sg:
#ifdef CONFIG_RAPIDIO_DMA_ENGINE
struct rio_dma_ext;
static inline struct dma_async_tx_descriptor *dmaengine_prep_rio_sg(
struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len,
enum dma_transfer_direction dir, unsigned long flags,
struct rio_dma_ext *rio_ext)
{
if (!chan || !chan->device || !chan->device->device_prep_slave_sg)
return NULL;
return chan->device->device_prep_slave_sg(chan, sgl, sg_len,
dir, flags, rio_ext);
}
mobi_dma.c下:
static struct dma_async_tx_descriptor *rab_prep_rio_sg(struct dma_chan *dchan,
struct scatterlist *sgl, unsigned int sg_len,
enum dma_transfer_direction dir, unsigned long flags,
void *tinfo)
{
struct rab_dma_chan *rab_dchan = to_rab_chan(dchan);
struct rab_tx_desc *desc = NULL;
struct rio_dma_ext *rext = tinfo;
struct dma_async_tx_descriptor *txd = NULL;
....
rab_dchan->dir = dir;
.....
spin_lock_bh(&rab_dchan->lock);
if (!list_empty(&rab_dchan->free_list)) {
desc = list_first_entry(&rab_dchan->free_list,
struct rab_tx_desc, desc_node);
list_del_init(&desc->desc_node);
desc->destid = rext->destid;
desc->rio_addr = rext->rio_addr;
desc->rio_addr_u = 0;
desc->sg_len = sg_len;
desc->sg = sgl;
txd = &desc->txd;
txd->flags = flags;
}
spin_unlock_bh(&rab_dchan->lock);
if (!txd) {
dev_dbg(&dchan->dev->device,
"DMAC%d free TXD is not available", rab_dchan->id);
return ERR_PTR(-EBUSY);
}
return txd;
}
dmaengine_submit 调用发送
linux/dmaengine.h 文件夹下定义
static inline dma_cookie_t dmaengine_submit(struct dma_async_tx_descriptor *desc)
{
return desc->tx_submit(desc);
}
在mobi_dma.c文件夹下 rab_alloc_chan_resources函数下有对结构体初始化,实际发送调用的rab_tx_submit函数。
desc[i].txd.tx_submit = rab_tx_submit;
在mobi_dma.c文件夹下
static dma_cookie_t rab_tx_submit(struct dma_async_tx_descriptor *txd)
{
struct rab_tx_desc *desc = to_rab_tx_desc(txd);
struct rab_dma_chan *dma_chan = to_rab_chan(txd->chan);
dma_cookie_t cookie;
/* Check if the descriptor is detached from any lists */
if (!list_empty(&desc->desc_node)) {
dev_err(&dma_chan->dchan.dev->device,
"DMAC%d wrong state of descriptor %p",
dma_chan->id, txd);
return -EIO;
}
spin_lock_bh(&dma_chan->lock);
if (!dma_chan->active) {
spin_unlock_bh(&dma_chan->lock);
return -ENODEV;
}
cookie = dma_cookie_assign(txd);
desc->status = DMA_IN_PROGRESS;
list_add_tail(&desc->desc_node, &dma_chan->queue);
rab_advance_work(dma_chan, NULL);
spin_unlock_bh(&dma_chan->lock);
return cookie;
}
在mobi_dma.c文件夹下:
static void rab_advance_work(struct rab_dma_chan *rab_dchan,
struct rab_tx_desc *desc)
{
int err = 0;
dev_dbg(&rab_dchan->dchan.dev->device, "DMAC%d", rab_dchan->id);
if (!rab_dma_is_idle(rab_dchan))
return;
....
if (!desc && !rab_dchan->active_tx && !list_empty(&rab_dchan->queue)) {
desc = list_first_entry(&rab_dchan->queue,
struct rab_tx_desc, desc_node);
list_del_init((&desc->desc_node));
rab_dchan->active_tx = desc;
}
if (desc) {
err = rab_submit_sg(desc);
if (!err) {
rab_start_dma(rab_dchan);
} else {
rab_dma_tx_err(rab_dchan, desc);
.....
}
}
.....
}
mobi_dma.c 文件下,rab_submit_sg 由于应用程序使用malloc 申请的内存不一定是连续的,有可能在单独页或者是连续几个页上。由于IP的DMA单次发送只发送1M的数据,所以在rab_submit_sg中的rab_desc_fill_init调用根据数据的连续情况进行拆分,如果有大于1M的数据包需要拆分成多个小于1M的数据包。需要注意的是next_addr 是实际AXI总线上的发送接收描述符物理地址,bd_ptr是对应的虚拟地址。
/* Must be called with the channel spinlock held */
static int rab_submit_sg(struct rab_tx_desc *desc)
{
struct dma_chan *chan = desc->txd.chan;
struct rab_dma_chan *rab_dchan = to_rab_chan(chan);
u32 sys_size = 0;
u64 rio_addr = 0;
dma_addr_t next_addr;
struct scatterlist *sg = desc->sg;
unsigned int i = 0;
int fill_dma_return=0;
struct rab_dma_desc *bd_ptr = NULL;
u32 idx = 0;
struct device *ch_dev = &chan->dev->device;
if (!rab_dma_is_idle(rab_dchan)) {
.....
return -EIO;
}
/*
* Fill DMA channel's hardware buffer descriptors.
* (NOTE: RapidIO destination address is limited to 64 bits for now)
*/
rio_addr = desc->rio_addr;
sys_size = dma_to_mport(chan->device)->sys_size;
idx = 0;
/* Initialize DMA descriptor */
for_each_sg(desc->sg, sg, desc->sg_len, i) {
.......
bd_ptr = &((struct rab_dma_desc *)rab_dchan->bd_base)[idx];
desc->rio_addr = rio_addr;
if (sg_is_last(sg)) {
fill_dma_return =rab_desc_fill_init(desc, bd_ptr, sg,next_addr,0);
if (fill_dma_return<0) {
.....
return fill_dma_return;
}
.....
rab_dchan->id, bd_ptr, desc->destid, desc->rio_addr);
desc->sg_len = 0;
break;
}
next_addr = rab_dchan->bd_phys + (idx + 1) * 16;
......
fill_dma_return = rab_desc_fill_init(desc, bd_ptr, sg, next_addr,1);
if (fill_dma_return<0) {
.....
return fill_dma_return;
}
.....
idx+=fill_dma_return;
if(idx>dma_queue_sz){
......
return -EINVAL;
}
rio_addr += sg_dma_len(sg);
}
.....
return 0;
}
mobi_dma.c 文件夹下的rab_desc_fill_init实现了最底层的DMA寄存器的操作,控制发送。
static int rab_desc_fill_init(struct rab_tx_desc *desc,
struct rab_dma_desc *bd_ptr,
struct scatterlist *sg,
dma_addr_t next_addr
char flag)
{
struct dma_chan *chan = desc->txd.chan;
struct rab_dma_chan *rab_dchan = to_rab_chan(chan);
unsigned int sg_len=sg_dma_len(sg);
unsigned int dma_len;
int desc_count=0,move_len;
if (!bd_ptr)
return -EINVAL;
while(sg_len){
if(sg_len>RAB_DMA_MAX_TRANS_BYTES){//数据包大于1M要进行分包处理
dma_len=RAB_DMA_MAX_TRANS_BYTES;
sg_len-=RAB_DMA_MAX_TRANS_BYTES;
}
else{
dma_len=sg_len;
sg_len=0;
}
/* Initialize DMA descriptor */
bd_ptr->rab_dma_desc_ctl = RAB_DMA_DESC_VALID | RAB_DMA_DESC_NEXT_VALID |(dma_len >> 2) << 5;
move_len=RAB_DMA_MAX_TRANS_BYTES*desc_count;//如果有进行分包处理传输地址需要偏移
if (rab_dchan->dir == DMA_MEM_TO_DEV) {
bd_ptr->rab_dma_desc_src_addr = (sg_dma_address(sg)+ move_len)>> 2;
bd_ptr->rab_dma_desc_dest_addr = (desc->rio_addr+ move_len )>> 2;
} else {
bd_ptr->rab_dma_desc_src_addr = (desc->rio_addr+ move_len) >> 2;
bd_ptr->rab_dma_desc_dest_addr = (sg_dma_address(sg)+ move_len) >> 2;
}
if((sg_len==0)&&(flag==0)){//最后一个SG的最后一个小的DMA要赋值下个DMA地址为0结束RAPIDIO的地址发送
bd_ptr->rab_dma_desc_next_addr = 0;
}
else{
bd_ptr->rab_dma_desc_next_addr = ((unsigned int)next_addr+16*desc_count)>> 2;
}
.....
bd_ptr++;
desc_count++;
}
return desc_count;
}
mobi_dma.c 文件夹下的rab_start_dma 实现了最底层的DMA寄存器的操作,控制发送。
/* Must be called with the spinlock holding */
static void rab_start_dma(struct rab_dma_chan *rab_dchan)
{
.........
}