linux内核皆使用dmaengine架构来管理dma,未了解的可通过以下链接熟悉下:
https://blog.csdn.net/were0415/article/details/54095899
现以视频输出为例进一步分析:
1.FPGA分配了一个显存空间,如下设备树所示:
VideoOut_1ch_v_frmbuf_rd_0: v_frmbuf_rd@b0000000 {
#dma-cells = <1>;
clock-names = "ap_clk";
clocks = <&clk 72>;
compatible = "xlnx,v-frmbuf-rd-2.1", "xlnx,axi-frmbuf-rd-v2.1";
interrupt-names = "interrupt";
interrupt-parent = <&gic>;
interrupts = <0 105 4>;
reg = <0x0 0xb0000000 0x0 0x10000>;
reset-gpios = <&gpio 81 1>;
xlnx,dma-addr-width = <64>;
xlnx,dma-align = <8>;
xlnx,max-height = <2160>;
xlnx,max-width = <3840>;
xlnx,pixels-per-clock = <1>;
xlnx,s-axi-ctrl-addr-width = <0x7>;
xlnx,s-axi-ctrl-data-width = <0x20>;
xlnx,vid-formats = "yuyv", "nv12", "nv16";
xlnx,video-width = <8>;
};
在驱动中会注册DMA engine,调用函数of_dma_controller_register;
static int xilinx_frmbuf_probe(struct platform_device *pdev)
{
struct device_node *node = pdev->dev.of_node;
struct xilinx_frmbuf_device *xdev;
struct resource *io;
enum dma_transfer_direction dma_dir;
const struct of_device_id *match;
int err;
u32 i, j, align, ppc;
int hw_vid_fmt_cnt;
const char *vid_fmts[ARRAY_SIZE(xilinx_frmbuf_formats)];
xdev = devm_kzalloc(&pdev->dev, sizeof(*xdev), GFP_KERNEL);
if (!xdev)
return -ENOMEM;
xdev->dev = &pdev->dev;
match = of_match_node(xilinx_frmbuf_of_ids, node);
if (!match)
return -ENODEV;
xdev->cfg = match->data;
dma_dir = (enum dma_transfer_direction)xdev->cfg->direction;
xdev->rst_gpio = devm_gpiod_get(&pdev->dev, "reset",
GPIOD_OUT_HIGH);
if (IS_ERR(xdev->rst_gpio)) {
err = PTR_ERR(xdev->rst_gpio);
if (err == -EPROBE_DEFER)
dev_info(&pdev->dev,
"Probe deferred due to GPIO reset defer\n");
else
dev_err(&pdev->dev,
"Unable to locate reset property in dt\n");
return err;
}
gpiod_set_value_cansleep(xdev->rst_gpio, 0x0);
io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
xdev->regs = devm_ioremap_resource(&pdev->dev, io);
if (IS_ERR(xdev->regs))
return PTR_ERR(xdev->regs);
err = of_property_read_u32(node, "xlnx,max-height", &xdev->max_height);
if (err < 0) {
xdev->max_height = XILINX_FRMBUF_MAX_HEIGHT;
} else if (xdev->max_height > XILINX_FRMBUF_MAX_HEIGHT ||
xdev->max_height < XILINX_FRMBUF_MIN_HEIGHT) {
dev_err(&pdev->dev, "Invalid height in dt");
return -EINVAL;
}
err = of_property_read_u32(node, "xlnx,max-width", &xdev->max_width);
if (err < 0) {
xdev->max_width = XILINX_FRMBUF_MAX_WIDTH;
} else if (xdev->max_width > XILINX_FRMBUF_MAX_WIDTH ||
xdev->max_width < XILINX_FRMBUF_MIN_WIDTH) {
dev_err(&pdev->dev, "Invalid width in dt");
return -EINVAL;
}
/* Initialize the DMA engine */
if (xdev->cfg->flags & XILINX_PPC_PROP) {
err = of_property_read_u32(node, "xlnx,pixels-per-clock", &ppc);
if (err || (ppc != 1 && ppc != 2 && ppc != 4)) {
dev_err(&pdev->dev, "missing or invalid pixels per clock dts prop\n");
return err;
}
err = of_property_read_u32(node, "xlnx,dma-align", &align);
if (err)
align = ppc * XILINX_FRMBUF_ALIGN_MUL;
if (align < (ppc * XILINX_FRMBUF_ALIGN_MUL) ||
ffs(align) != fls(align)) {
dev_err(&pdev->dev, "invalid dma align dts prop\n");
return -EINVAL;
}
} else {
align = 16;
}
xdev->common.copy_align = fls(align) - 1;
xdev->common.dev = &pdev->dev;
INIT_LIST_HEAD(&xdev->common.channels);
dma_cap_set(DMA_SLAVE, xdev->common.cap_mask);
dma_cap_set(DMA_PRIVATE, xdev->common.cap_mask);
/* Initialize the channels */
err = xilinx_frmbuf_chan_probe(xdev, node);
if (err < 0)
return err;
xdev->chan.direction = dma_dir;
if (xdev->chan.direction == DMA_DEV_TO_MEM) {
xdev->common.directions = BIT(DMA_DEV_TO_MEM);
dev_info(&pdev->dev, "Xilinx AXI frmbuf DMA_DEV_TO_MEM\n");
} else if (xdev->chan.direction == DMA_MEM_TO_DEV) {
xdev->common.directions = BIT(DMA_MEM_TO_DEV);
dev_info(&pdev->dev, "Xilinx AXI frmbuf DMA_MEM_TO_DEV\n");
} else {
xilinx_frmbuf_chan_remove(&xdev->chan);
return -EINVAL;
}
/* read supported video formats and update internal table */
hw_vid_fmt_cnt = of_property_count_strings(node, "xlnx,vid-formats");
err = of_property_read_string_array(node, "xlnx,vid-formats",
vid_fmts, hw_vid_fmt_cnt);
if (err < 0) {
dev_err(&pdev->dev,
"Missing or invalid xlnx,vid-formats dts prop\n");
return err;
}
for (i = 0; i < hw_vid_fmt_cnt; i++) {
const char *vid_fmt_name = vid_fmts[i];
for (j = 0; j < ARRAY_SIZE(xilinx_frmbuf_formats); j++) {
const char *dts_name =
xilinx_frmbuf_formats[j].dts_name;
if (strcmp(vid_fmt_name, dts_name))
continue;
xdev->enabled_vid_fmts |=
xilinx_frmbuf_formats[j].fmt_bitmask;
}
}
/* Determine supported vid framework formats */
frmbuf_init_format_array(xdev);
xdev->common.device_alloc_chan_resources =
xilinx_frmbuf_alloc_chan_resources;
xdev->common.device_free_chan_resources =
xilinx_frmbuf_free_chan_resources;
xdev->common.device_prep_interleaved_dma =
xilinx_frmbuf_dma_prep_interleaved;
xdev->common.device_terminate_all = xilinx_frmbuf_terminate_all;
xdev->common.device_synchronize = xilinx_frmbuf_synchronize;
xdev->common.device_tx_status = xilinx_frmbuf_tx_status;
xdev->common.device_issue_pending = xilinx_frmbuf_issue_pending;
platform_set_drvdata(pdev, xdev);
/* Register the DMA engine with the core */
dma_async_device_register(&xdev->common);
err = of_dma_controller_register(node, of_dma_xilinx_xlate, xdev);
return 0;
}
static struct platform_driver xilinx_frmbuf_driver = {
.driver = {
.name = "xilinx-frmbuf",
.of_match_table = xilinx_frmbuf_of_ids,
},
.probe = xilinx_frmbuf_probe,
.remove = xilinx_frmbuf_remove,
};
module_platform_driver(xilinx_frmbuf_driver);
2.DMA Engine API编程
slave DMA用法包括以下的步骤:
1. 分配一个DMA slave通道;
2. 设置slave和controller特定的参数;
3. 获取一个传输描述符;
4. 提交传输描述符;
5. 发起等待的请求并等待回调通知。
首先驱动就是先分配一个DMA slave通道;
v_drm_dmaengine_drv111: drm-dmaengine-drv111 {
compatible = "xlnx,pl-disp";
dmas = <&VideoOut_1ch_v_frmbuf_rd_0 0>;
dma-names = "dma0";
xlnx,vformat = "YUYV"; /*大写*/
xlnx,bridge = <&VideoOut_1ch_v_tc_0>;
#address-cells = <1>;
#size-cells = <0>;
dmaengine_lcd_port: port@0 {
reg = <0>;
lcd_dmaengine_crtc: endpoint {
remote-endpoint = <&lcd_encoder>;
};
};
};
static int xlnx_pl_disp_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct device_node *vtc_node;
struct xlnx_pl_disp *xlnx_pl_disp;
int ret;
const char *vformat;
struct dma_chan *dma_chan;
struct xlnx_dma_chan *xlnx_dma_chan;
xlnx_pl_disp = devm_kzalloc(dev, sizeof(*xlnx_pl_disp), GFP_KERNEL);
if (!xlnx_pl_disp)
return -ENOMEM;
//请求分配dma通道
dma_chan = of_dma_request_slave_channel(dev->of_node, "dma0");
if (IS_ERR_OR_NULL(dma_chan)) {
dev_err(dev, "failed to request dma channel\n");
return PTR_ERR(dma_chan);
}
xlnx_dma_chan = devm_kzalloc(dev, sizeof(*xlnx_dma_chan), GFP_KERNEL);
if (!xlnx_dma_chan)
return -ENOMEM;
xlnx_dma_chan->dma_chan = dma_chan;
xlnx_pl_disp->chan = xlnx_dma_chan;
ret = of_property_read_string(dev->of_node, "xlnx,vformat", &vformat);
if (ret) {
dev_err(dev, "No xlnx,vformat value in dts\n");
goto err_dma;
}
strcpy((char *)&xlnx_pl_disp->fmt, vformat);
printk("+++++++++++vformat: %s, xlnx_pl_disp->fmt: 0x%x\n", vformat,xlnx_pl_disp->fmt);
/* VTC Bridge support */
vtc_node = of_parse_phandle(dev->of_node, "xlnx,bridge", 0);
printk("++++++++++++++++++++++++++vtc_node:%p\n", vtc_node);
if (vtc_node) {
xlnx_pl_disp->vtc_bridge = of_xlnx_bridge_get(vtc_node);
if (!xlnx_pl_disp->vtc_bridge) {
dev_info(dev, "Didn't get vtc bridge instance\n");
return -EPROBE_DEFER;
}
} else {
dev_info(dev, "vtc bridge property not present\n");
}
xlnx_pl_disp->dev = dev;
platform_set_drvdata(pdev, xlnx_pl_disp);
ret = component_add(dev, &xlnx_pl_disp_component_ops);
if (ret)
goto err_dma;
xlnx_pl_disp->master = xlnx_drm_pipeline_init(pdev);
if (IS_ERR(xlnx_pl_disp->master)) {
ret = PTR_ERR(xlnx_pl_disp->master);
dev_err(dev, "failed to initialize the drm pipeline\n");
goto err_component;
}
dev_info(&pdev->dev, "Xlnx PL display driver probed\n");
return 0;
err_component:
component_del(dev, &xlnx_pl_disp_component_ops);
err_dma:
dma_release_channel(xlnx_pl_disp->chan->dma_chan);
return ret;
}
2. 设置slave和controller特定的参数; 这个在上层mode set时候会调用到此处,配置参数;
//确定缓冲区分配大小,如1280x720@YUYV,则1280*2*720
static int xlnx_pl_disp_plane_mode_set(struct drm_plane *plane,
struct drm_framebuffer *fb,
int crtc_x, int crtc_y,
unsigned int crtc_w, unsigned int crtc_h,
u32 src_x, uint32_t src_y,
u32 src_w, uint32_t src_h)
{
printk("________________________________%s\n", __func__);
struct xlnx_pl_disp *xlnx_pl_disp = plane_to_dma(plane);
const struct drm_format_info *info = fb->format;
dma_addr_t luma_paddr, chroma_paddr;
size_t stride;
struct xlnx_dma_chan *xlnx_dma_chan = xlnx_pl_disp->chan;
if (info->num_planes > 2) {
dev_err(xlnx_pl_disp->dev, "Color format not supported\n");
return -EINVAL;
}
luma_paddr = drm_fb_cma_get_gem_addr(fb, plane->state, 0);
if (!luma_paddr) {
dev_err(xlnx_pl_disp->dev, "failed to get luma paddr\n");
return -EINVAL;
}
printk("____________________________luma_paddr = 0x%x\n", luma_paddr);
dev_dbg(xlnx_pl_disp->dev, "num planes = %d\n", info->num_planes);
xlnx_dma_chan->xt.numf = src_h;
xlnx_dma_chan->sgl[0].size = drm_format_plane_width_bytes(info,
0, src_w);
xlnx_dma_chan->sgl[0].icg = fb->pitches[0] - xlnx_dma_chan->sgl[0].size;
xlnx_dma_chan->xt.src_start = luma_paddr;
xlnx_dma_chan->xt.frame_size = info->num_planes;
xlnx_dma_chan->xt.dir = DMA_MEM_TO_DEV;
xlnx_dma_chan->xt.src_sgl = true;
xlnx_dma_chan->xt.dst_sgl = false;
/* Do we have a video format aware dma channel?
* so, modify descriptor accordingly. Hueristic test:
* we have a multi-plane format but only one dma channel
*/
if (info->num_planes > 1) {
chroma_paddr = drm_fb_cma_get_gem_addr(fb, plane->state, 1);
if (!chroma_paddr) {
dev_err(xlnx_pl_disp->dev,
"failed to get chroma paddr\n");
return -EINVAL;
}
stride = xlnx_dma_chan->sgl[0].size +
xlnx_dma_chan->sgl[0].icg;
xlnx_dma_chan->sgl[0].src_icg = chroma_paddr -
xlnx_dma_chan->xt.src_start -
(xlnx_dma_chan->xt.numf * stride);
}
return 0;
}
static void xlnx_pl_disp_plane_atomic_update(struct drm_plane *plane,
struct drm_plane_state *old_state)
{
int ret;
struct xlnx_pl_disp *xlnx_pl_disp = plane_to_dma(plane);
printk("________________________________%s\n", __func__);
ret = xlnx_pl_disp_plane_mode_set(plane,
plane->state->fb,
plane->state->crtc_x,
plane->state->crtc_y,
plane->state->crtc_w,
plane->state->crtc_h,
plane->state->src_x >> 16,
plane->state->src_y >> 16,
plane->state->src_w >> 16,
plane->state->src_h >> 16);
if (ret) {
dev_err(xlnx_pl_disp->dev, "failed to mode set a plane\n");
return;
}
/* in case frame buffer is used set the color format */
xilinx_xdma_drm_config(xlnx_pl_disp->chan->dma_chan,
xlnx_pl_disp->plane.state->fb->format->format);
/* apply the new fb addr and enable */
xlnx_pl_disp_plane_enable(plane);
}
static const struct drm_plane_helper_funcs xlnx_pl_disp_plane_helper_funcs = {
.atomic_update = xlnx_pl_disp_plane_atomic_update,
.atomic_disable = xlnx_pl_disp_plane_atomic_disable,
};
3. 获取一个传输描述符;
4. 提交传输描述符;
/**
* xlnx_pl_disp_plane_enable - Enables DRM plane
* @plane: DRM plane object
*
* Enable the DRM plane, by enabling the corresponding DMA
*/
static void xlnx_pl_disp_plane_enable(struct drm_plane *plane)
{
struct xlnx_pl_disp *xlnx_pl_disp = plane_to_dma(plane);
struct dma_async_tx_descriptor *desc;
enum dma_ctrl_flags flags;
struct xlnx_dma_chan *xlnx_dma_chan = xlnx_pl_disp->chan;
struct dma_chan *dma_chan = xlnx_dma_chan->dma_chan;
struct dma_interleaved_template *xt = &xlnx_dma_chan->xt;
printk("________________________________%s\n", __func__);
flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
//获取dma描述符
desc = dmaengine_prep_interleaved_dma(dma_chan, xt, flags);
if (!desc) {
dev_err(xlnx_pl_disp->dev,
"failed to prepare DMA descriptor\n");
return;
}
desc->callback = xlnx_pl_disp->callback;
desc->callback_param = xlnx_pl_disp->callback_param;
xilinx_xdma_set_earlycb(xlnx_dma_chan->dma_chan, desc, true);
if (plane->state->fb->flags == DRM_MODE_FB_ALTERNATE_TOP ||
plane->state->fb->flags == DRM_MODE_FB_ALTERNATE_BOTTOM) {
if (plane->state->fb->flags == DRM_MODE_FB_ALTERNATE_TOP)
xlnx_pl_disp->fid = 1;
else
xlnx_pl_disp->fid = 0;
xilinx_xdma_set_fid(xlnx_dma_chan->dma_chan, desc,
xlnx_pl_disp->fid);
}
dmaengine_submit(desc);
dma_async_issue_pending(xlnx_dma_chan->dma_chan);
}
5. 发起等待的请求并等待回调通知。
dma_async_issue_pending(xlnx_dma_chan->dma_chan);
回调的就是之前显存注册那边的函数:
xdev->common.device_issue_pending = xilinx_frmbuf_issue_pending;
static void xilinx_frmbuf_issue_pending(struct dma_chan *dchan)
{
struct xilinx_frmbuf_chan *chan = to_xilinx_chan(dchan);
unsigned long flags;
spin_lock_irqsave(&chan->lock, flags);
xilinx_frmbuf_start_transfer(chan);
spin_unlock_irqrestore(&chan->lock, flags);
}
/**
* xilinx_frmbuf_start_transfer - Starts frmbuf transfer
* @chan: Driver specific channel struct pointer
*/
static void xilinx_frmbuf_start_transfer(struct xilinx_frmbuf_chan *chan)
{
struct xilinx_frmbuf_tx_descriptor *desc;
if (!chan->idle)
return;
if (chan->staged_desc) {
chan->active_desc = chan->staged_desc;
chan->staged_desc = NULL;
}
if (list_empty(&chan->pending_list))
return;
desc = list_first_entry(&chan->pending_list,
struct xilinx_frmbuf_tx_descriptor,
node);
printk("xilinx_frmbuf_start_transfer:desc->hw.luma_plane_addr = 0x%x\n", desc->hw.luma_plane_addr);
/* Start the transfer */
chan->write_addr(chan, XILINX_FRMBUF_ADDR_OFFSET,
desc->hw.luma_plane_addr);
chan->write_addr(chan, XILINX_FRMBUF_ADDR2_OFFSET,
desc->hw.chroma_plane_addr);
/* HW expects these parameters to be same for one transaction */
frmbuf_write(chan, XILINX_FRMBUF_WIDTH_OFFSET, desc->hw.hsize);
frmbuf_write(chan, XILINX_FRMBUF_STRIDE_OFFSET, desc->hw.stride);
frmbuf_write(chan, XILINX_FRMBUF_HEIGHT_OFFSET, desc->hw.vsize);
frmbuf_write(chan, XILINX_FRMBUF_FMT_OFFSET, chan->vid_fmt->id);
/* If it is framebuffer read IP set the FID */
if (chan->direction == DMA_MEM_TO_DEV && chan->hw_fid)
frmbuf_write(chan, XILINX_FRMBUF_FID_OFFSET, desc->fid);
/* Start the hardware */
xilinx_frmbuf_start(chan);
list_del(&desc->node);
/* No staging descriptor required when auto restart is disabled */
if (chan->mode == AUTO_RESTART)
chan->staged_desc = desc;
else
chan->active_desc = desc;
}
这样dma通道就开启。
看看应用层如何操作:
static int drm_buffer_create(struct drm_device *drm_dev, unsigned int index)
{
int i,ret;
struct drm_mode_create_dumb creq;
struct drm_prime_handle prime;
struct drm_buffer *buf = &drm_dev->d_buff[index];
buf->index = index;
memset(&creq, 0, sizeof(creq));
creq.width = drm_dev->width;
creq.height = drm_dev->height;
creq.bpp = BYTES_PER_PIXEL * 8;
creq.flags = 0;
ret = drmIoctl(drm_dev->fd, DRM_IOCTL_MODE_CREATE_DUMB, &creq);
if (ret){
printf("create dumb failed!\n");
return -1;
}
uint32_t offsets[4] = { 0, 0, 0, 0 };
uint32_t pitches[4] = { 0, 0, 0, 0 };
// uint32_t bo_handles[4] = { 0, 0, 0, 0 };
uint32_t stride = creq.pitch;
printf("stride = %d\n", stride);
memset(&prime, 0, sizeof prime);
prime.handle = creq.handle;
/* Export GEM object to a FD */
ret = ioctl(drm_dev->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &prime);
if (ret) {
printf("PRIME_HANDLE_TO_FD failed.\n");
return -1;
}
//get buf info
i = 0;
buf->num_planes = 1;
buf->dmabuf_fd[i] = prime.fd;
buf->offsets[i]= 0;
buf->lengths[i]= stride * drm_dev->height;
buf->dumb_buff_length[i] = creq.size;
pitches[0] = stride;
offsets[0] = 0;
buf->bo_handle[0] = creq.handle;
//使用缓存的handel创建一个FB,返回fb的id:fb_handle。
ret = drmModeAddFB2(drm_dev->fd, drm_dev->width, drm_dev->height, drm_dev->format, &buf->bo_handle[0], \
pitches, offsets, &buf->fb_handle, 0);
if (ret){
printf("failed to create fb\n");
return -1;
}
struct drm_mode_map_dumb mreq; //请求映射缓存到内存。
mreq.handle = creq.handle;
ret = drmIoctl(drm_dev->fd, DRM_IOCTL_MODE_MAP_DUMB, &mreq);
if (ret){
printf("map dumb failed!\n");
}
// 猜测:创建的缓存位于显存上,在使用之前先使用drm_mode_map_dumb将其映射到内存空间。
// 但是映射后缓存位于内核内存空间,还需要一次mmap才能被程序使用。
buf->drm_buff[i] = mmap(0, creq.size, PROT_READ | PROT_WRITE, MAP_SHARED, drm_dev->fd, mreq.offset);
if (buf->drm_buff[i] == MAP_FAILED){
printf("mmap failed!\n");
}
printf("=====================================================================\n");
//一切准备完毕,只差连接在一起了!
ret = drmModeSetCrtc(drm_dev->fd, drm_dev->crtc_id, buf->fb_handle, 0, 0, &drm_dev->connector->connector_id, 1, drm_dev->connector->modes);
printf("ret = %d, drm_dev->connector->modes->clock = %d\n", ret,drm_dev->connector->modes->clock);
// ret = drmModeSetPlane(drm_dev->fd, drm_dev->plane_id, drm_dev->crtc_id, buf->fb_handle, 0, 0, 0,
// drm_dev->width, drm_dev->height,0, 0, drm_dev->width << 16, drm_dev->height << 16);
return 0;
}
通过drmModeSetCrtc函数可调用底层xlnx_pl_disp_plane_atomic_update(具体内容可以看上面源码)并一一对应了以上几部的DMA操作;这里输入fb_handle应该就是数据源;那目标就是显存;通过dmac传输。
比如分配多个buf,则内核打印调试信息:
____________________________luma_paddr = 0x70500000
____________________________luma_paddr = 0x70300000
____________________________luma_paddr = 0x70100000