1.pl.dtsi
/*
* CAUTION: This file is automatically generated by Xilinx.
*/
/ {
amba_pl: amba_pl@0 {
#address-cells = <2>;
#size-cells = <2>;
compatible = "simple-bus";
ranges ;
axi_dma_0: dma@80040000 {
#dma-cells = <1>;
clock-names = "s_axi_lite_aclk", "m_axi_sg_aclk", "m_axi_mm2s_aclk", "m_axi_s2mm_aclk";
clocks = <&zynqmp_clk 71>, <&zynqmp_clk 71>, <&zynqmp_clk 71>, <&zynqmp_clk 71>;
compatible = "xlnx,axi-dma-7.1", "xlnx,axi-dma-1.00.a";
interrupt-names = "mm2s_introut", "s2mm_introut";
interrupt-parent = <&gic>;
interrupts = <0 92 4 0 93 4>;
reg = <0x0 0x80040000 0x0 0x10000>;
xlnx,addrwidth = <0x20>;
xlnx,include-sg ;
xlnx,sg-length-width = <0x14>;
dma-channel@80040000 {
compatible = "xlnx,axi-dma-mm2s-channel";
dma-channels = <0x1>;
interrupts = <0 92 4>;
xlnx,datawidth = <0x20>;
xlnx,device-id = <0x0>;
};
dma-channel@80040030 {
compatible = "xlnx,axi-dma-s2mm-channel";
dma-channels = <0x1>;
interrupts = <0 93 4>;
xlnx,datawidth = <0x20>;
xlnx,device-id = <0x0>;
};
};
axi_dynclk_0: axi_dynclk@80010000 {
clock-names = "REF_CLK_I", "s00_axi_aclk";
clocks = <&zynqmp_clk 71>, <&zynqmp_clk 71>;
compatible = "xlnx,axi-dynclk-1.0";
reg = <0x0 0x80010000 0x0 0x10000>;
xlnx,s00-axi-addr-width = <0x5>;
xlnx,s00-axi-data-width = <0x20>;
};
axi_vdma_0: dma@80020000 {
#dma-cells = <1>;
clock-names = "s_axi_lite_aclk", "m_axi_mm2s_aclk", "m_axis_mm2s_aclk";
clocks = <&zynqmp_clk 71>, <&zynqmp_clk 72>, <&zynqmp_clk 72>;
compatible = "xlnx,axi-vdma-6.3", "xlnx,axi-vdma-1.00.a";
interrupt-names = "mm2s_introut";
interrupt-parent = <&gic>;
interrupts = <0 90 4>;
reg = <0x0 0x80020000 0x0 0x10000>;
xlnx,addrwidth = <0x40>;
xlnx,flush-fsync = <0x1>;
xlnx,num-fstores = <0x3>;
dma-channel@80020000 {
compatible = "xlnx,axi-vdma-mm2s-channel";
interrupts = <0 90 4>;
xlnx,datawidth = <0x18>;
xlnx,device-id = <0x0>;
xlnx,include-dre ;
};
};
v_tc_0: v_tc@80030000 {
clock-names = "clk", "s_axi_aclk";
clocks = <&misc_clk_0>, <&zynqmp_clk 71>;
compatible = "xlnx,v-tc-6.2", "xlnx,v-tc-6.1";
interrupt-names = "irq";
interrupt-parent = <&gic>;
interrupts = <0 89 4>;
reg = <0x0 0x80030000 0x0 0x10000>;
xlnx,generator ;
};
misc_clk_0: misc_clk_0 {
#clock-cells = <0>;
clock-frequency = <100000000>;
compatible = "fixed-clock";
};
};
};
system-user.dtsi
#include <dt-bindings/gpio/gpio.h>
#include <dt-bindings/pinctrl/pinctrl-zynqmp.h>
/include/ "system-conf.dtsi"
/ {
model = "Zynq MpSoc Development Board";
chosen {
bootargs = "earlycon console=ttyPS0,115200 clk_ignore_unused root=/dev/mmcblk1p2 rw rootwait";
stdout-path = "serial0:115200n8";
};
};
&amba_pl {
axidmatest_0: axidmatest@0 {
compatible ="xlnx,axi-dma-test-1.00.a";
dmas = <&axi_dma_0 0 &axi_dma_0 1>;
dma-names = "axidma0","axidma1";
};
};
/* SD */
&sdhci1 {
disable-wp;
no-1-8-v;
};
/* USB */
&dwc3_0 {
status = "okay";
dr_mode = "host";
};
&amba {
zyxclmm_drm {
compatible = "xlnx,zocl";
status = "okay";
};
};
&i2c0 {
status = "okay";
clock-frequency = <100000>;
xilinx_an071@5D {
compatible = "goodix,gt9xx";
reg = <0x5D>;
interrupt-parent = <&gic>;
interrupts = <0 91 4>;
};
};
2.内核驱动程序基于axidmatest.c修改
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* XILINX AXI DMA and MCDMA Engine test module
*
* Copyright (C) 2010 Xilinx, Inc. All rights reserved.
*
* Based on Atmel DMA Test Client
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/delay.h>
#include <linux/dmaengine.h>
#include <linux/init.h>
#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/of_dma.h>
#include <linux/platform_device.h>
#include <linux/random.h>
#include <linux/slab.h>
#include <linux/wait.h>
#include <linux/sched/task.h>
#include <linux/dma/xilinx_dma.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/device.h>
#include <asm/io.h>
#include <linux/platform_device.h>
#include <linux/miscdevice.h>
#include <linux/ioport.h>
#include <linux/of.h>
#include <linux/uaccess.h>
#include <asm/uaccess.h>
#include <linux/dma-mapping.h>
//#define MYDEBUG
//内核DMA map长度或者单次能够传输的最大长度
#define MY_MAX_BUF_SIZE (327680 * 20)
static struct dma_chan *global_tx_chan;
static struct dma_chan *global_rx_chan;
static u8 **global_srcs;
static u8 **global_dsts;
static unsigned int test_buf_size = MY_MAX_BUF_SIZE;;
module_param(test_buf_size, uint, 0444);
MODULE_PARM_DESC(test_buf_size, "Size of the memcpy test buffer");
static unsigned int iterations = 5;
module_param(iterations, uint, 0444);
MODULE_PARM_DESC(iterations,
"Iterations before stopping test (default: infinite)");
static int is_send_or_recv = 0;
static u8 gexchangebuf[MY_MAX_BUF_SIZE] = {0};
static int send_or_recv_len = 0;
static u8 is_dmatest_slave_tx_callback = 0;
static u8 is_dmatest_slave_rx_callback = 0;
/*
* Initialization patterns. All bytes in the source buffer has bit 7
* set, all bytes in the destination buffer has bit 7 cleared.
*
* Bit 6 is set for all bytes which are to be copied by the DMA
* engine. Bit 5 is set for all bytes which are to be overwritten by
* the DMA engine.
*
* The remaining bits are the inverse of a counter which increments by
* one for each byte address.
*/
#define PATTERN_SRC 0x80
#define PATTERN_DST 0x00
#define PATTERN_COPY 0x40
#define PATTERN_OVERWRITE 0x20
#define PATTERN_COUNT_MASK 0x1f
#define XILINX_DMATEST_BD_CNT 1//11
struct dmatest_slave_thread {
struct list_head node;
struct task_struct *task;
struct dma_chan *tx_chan;
struct dma_chan *rx_chan;
u8 **srcs;
u8 **dsts;
enum dma_transaction_type type;
bool done;
};
struct dmatest_chan {
struct list_head node;
struct dma_chan *chan;
struct list_head threads;
};
/*
* These are protected by dma_list_mutex since they're only used by
* the DMA filter function callback
*/
static DECLARE_WAIT_QUEUE_HEAD(thread_wait);
static LIST_HEAD(dmatest_channels);
static unsigned int nr_channels;
static unsigned long long dmatest_persec(s64 runtime, unsigned int val)
{
unsigned long long per_sec = 1000000;
if (runtime <= 0)
return 0;
/* drop precision until runtime is 32-bits */
while (runtime > UINT_MAX) {
runtime >>= 1;
per_sec <<= 1;
}
per_sec *= val;
do_div(per_sec, runtime);
return per_sec;
}
static unsigned long long dmatest_KBs(s64 runtime, unsigned long long len)
{
return dmatest_persec(runtime, len >> 10);
}
static bool is_threaded_test_run(struct dmatest_chan *tx_dtc,
struct dmatest_chan *rx_dtc)
{
struct dmatest_slave_thread *thread;
int ret = false;
list_for_each_entry(thread, &tx_dtc->threads, node) {
if (!thread->done)
ret = true;
}
list_for_each_entry(thread, &rx_dtc->threads, node) {
if (!thread->done)
ret = true;
}
return ret;
}
/*
static unsigned long dmatest_random(void)
{
unsigned long buf;
get_random_bytes(&buf, sizeof(buf));
return buf;
}
static void dmatest_init_srcs(u8 **bufs, unsigned int start, unsigned int len)
{
unsigned int i;
u8 *buf;
for (; (buf = *bufs); bufs++) {
for (i = 0; i < start; i++)
buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK);
for ( ; i < start + len; i++)
buf[i] = PATTERN_SRC | PATTERN_COPY
| (~i & PATTERN_COUNT_MASK);
for ( ; i < test_buf_size; i++)
buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK);
}
}
static void dmatest_init_dsts(u8 **bufs, unsigned int start, unsigned int len)
{
unsigned int i;
u8 *buf;
for (; (buf = *bufs); bufs++) {
for (i = 0; i < start; i++)
buf[i] = PATTERN_DST | (~i & PATTERN_COUNT_MASK);
for ( ; i < start + len; i++)
buf[i] = PATTERN_DST | PATTERN_OVERWRITE
| (~i & PATTERN_COUNT_MASK);
for ( ; i < test_buf_size; i++)
buf[i] = PATTERN_DST | (~i & PATTERN_COUNT_MASK);
}
}
static void dmatest_mismatch(u8 actual, u8 pattern, unsigned int index,
unsigned int counter, bool is_srcbuf)
{
u8 diff = actual ^ pattern;
u8 expected = pattern | (~counter & PATTERN_COUNT_MASK);
const char *thread_name = current->comm;
if (is_srcbuf)
pr_warn("%s: srcbuf[0x%x] overwritten! Expected %02x, got %02x\n",
thread_name, index, expected, actual);
else if ((pattern & PATTERN_COPY) &&
(diff & (PATTERN_COPY | PATTERN_OVERWRITE)))
pr_warn("%s: dstbuf[0x%x] not copied! Expected %02x, got %02x\n",
thread_name, index, expected, actual);
else if (diff & PATTERN_SRC)
pr_warn("%s: dstbuf[0x%x] was copied! Expected %02x, got %02x\n",
thread_name, index, expected, actual);
else
pr_warn("%s: dstbuf[0x%x] mismatch! Expected %02x, got %02x\n",
thread_name, index, expected, actual);
}
static unsigned int dmatest_verify(u8 **bufs, unsigned int start,
unsigned int end, unsigned int counter,
u8 pattern, bool is_srcbuf)
{
unsigned int i;
unsigned int error_count = 0;
u8 actual;
u8 expected;
u8 *buf;
unsigned int counter_orig = counter;
for (; (buf = *bufs); bufs++) {
counter = counter_orig;
for (i = start; i < end; i++) {
actual = buf[i];
expected = pattern | (~counter & PATTERN_COUNT_MASK);
if (actual != expected) {
if (error_count < 32)
dmatest_mismatch(actual, pattern, i,
counter, is_srcbuf);
error_count++;
}
counter++;
}
}
if (error_count > 32)
pr_warn("%s: %u errors suppressed\n",
current->comm, error_count - 32);
return error_count;
}*/
static void dmatest_slave_tx_callback(void *completion)
{
complete(completion);
is_dmatest_slave_tx_callback = 1;
#ifdef MYDEBUG
printk("dmatest_slave_tx_callback...\n");
#endif
}
static void dmatest_slave_rx_callback(void *completion)
{
complete(completion);
is_dmatest_slave_rx_callback = 1;
#ifdef MYDEBUG
printk("dmatest_slave_rx_callback...\n");
#endif
}
/* Function for slave transfers
* Each thread requires 2 channels, one for transmit, and one for receive
*/
static int dmatest_slave_func(void *data)
{
struct dmatest_slave_thread *thread = data;
struct dma_chan *tx_chan;
struct dma_chan *rx_chan;
const char *thread_name;
unsigned int src_off, dst_off, len;
unsigned int error_count;
unsigned int failed_tests = 0;
unsigned int total_tests = 0;
dma_cookie_t tx_cookie;
dma_cookie_t rx_cookie;
enum dma_status status;
enum dma_ctrl_flags flags;
int ret;
int src_cnt;
int dst_cnt;
int bd_cnt = XILINX_DMATEST_BD_CNT;
int i;
ktime_t ktime, start, diff;
ktime_t filltime = 0;
ktime_t comparetime = 0;
s64 runtime = 0;
unsigned long long total_len = 0;
thread_name = current->comm;
ret = -ENOMEM;
/* Ensure that all previous reads are complete */
smp_rmb();
tx_chan = thread->tx_chan;
rx_chan = thread->rx_chan;
/**
//这里为11,实际上是创建11个sg,sg支持多个分块dma传输,这里可以改成1,就一块
*/
dst_cnt = bd_cnt;
src_cnt = bd_cnt;
/**
//申请发送缓冲区数据结构,
*/
thread->srcs = kcalloc(src_cnt + 1, sizeof(u8 *), GFP_KERNEL);
if (!thread->srcs)
goto err_srcs;
/**
//申请发送缓冲区,这里面放发送的数据,往datafifo发送的数据
*/
for (i = 0; i < src_cnt; i++) {
thread->srcs[i] = kmalloc(test_buf_size, GFP_KERNEL);
if (!thread->srcs[i])
goto err_srcbuf;
}
thread->srcs[i] = NULL;
/**
//申请接收缓冲区数据结构,
*/
thread->dsts = kcalloc(dst_cnt + 1, sizeof(u8 *), GFP_KERNEL);
if (!thread->dsts)
goto err_dsts;
/**
//申请接收缓冲区,这里是dma从datafifo搬运的数据放到这里,
*/
for (i = 0; i < dst_cnt; i++) {
thread->dsts[i] = kmalloc(test_buf_size, GFP_KERNEL);
if (!thread->dsts[i])
goto err_dstbuf;
}
thread->dsts[i] = NULL;
set_user_nice(current, 10);
flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
ktime = ktime_get();
while (!kthread_should_stop())
{
struct dma_device *tx_dev = tx_chan->device;
struct dma_device *rx_dev = rx_chan->device;
struct dma_async_tx_descriptor *txd = NULL;
struct dma_async_tx_descriptor *rxd = NULL;
dma_addr_t dma_srcs[XILINX_DMATEST_BD_CNT];
dma_addr_t dma_dsts[XILINX_DMATEST_BD_CNT];
struct completion rx_cmp;
struct completion tx_cmp;
unsigned long rx_tmo = msecs_to_jiffies(300000 / 100); /* RX takes longer */
unsigned long tx_tmo = msecs_to_jiffies(30000 / 100);
u8 align = 0;
/**
//定义发送dma数据结构,
*/
struct scatterlist tx_sg[XILINX_DMATEST_BD_CNT];
/**
//定义接收dma数据结构,
*/
struct scatterlist rx_sg[XILINX_DMATEST_BD_CNT];
//idle time
if(!is_send_or_recv)
{
msleep(1);
continue;
}
/**
//----------------字节对齐,dma传输要求字节对齐
*/
/* honor larger alignment restrictions */
align = tx_dev->copy_align;
if (rx_dev->copy_align > align)
align = rx_dev->copy_align;
if (1 << align > test_buf_size) {
pr_err("%u-byte buffer too small for %d-byte alignment\n",
test_buf_size, 1 << align);
break;
}
len = send_or_recv_len % test_buf_size + 1;
len = (len >> align) << align;
if (!len)
len = 1 << align;
total_len += len;
src_off = send_or_recv_len % (test_buf_size - len + 1);
dst_off = send_or_recv_len % (test_buf_size - len + 1);
src_off = (src_off >> align) << align;
dst_off = (dst_off >> align) << align;
//copy user data
if(1 == is_send_or_recv)
{
//src_cnt == 1,bd_cnt == 1
memcpy(thread->srcs[0] + src_off,gexchangebuf,len);
#ifdef MYDEBUG
printk("len=%d\tsrc_off=%d\tdst_off=%d\n",len,src_off,dst_off);
#endif
for (i = 0; i < src_cnt; i++)
{
u8 *buf = thread->srcs[i] + src_off;
/**
//dma映射后获取的物理地址,实际上dma初始化用的是物理地址,thread->srcs[i] 这个是虚拟地址,是程序可以直接使用的,但是dma用的是物理地址
*/
dma_srcs[i] = dma_map_single(tx_dev->dev, buf, len,
DMA_MEM_TO_DEV);
}
}
if(2 == is_send_or_recv)
{
for (i = 0; i < dst_cnt; i++)
{
/**
//dma映射后获取的物理地址,实际上dma初始化用的是物理地址,thread->srcs[i] 这个是虚拟地址,是程序可以直接使用的,但是dma用的是物理地址
*/
dma_dsts[i] = dma_map_single(rx_dev->dev,
thread->dsts[i],
test_buf_size,
DMA_BIDIRECTIONAL);
//printk("dma_map_single over!\n");
}
}
/**
//初始化发送接收数据结构----------------------
*/
sg_init_table(tx_sg, bd_cnt);
sg_init_table(rx_sg, bd_cnt);
for (i = 0; i < bd_cnt; i++)
{
//物理地址
sg_dma_address(&tx_sg[i]) = dma_srcs[i];
sg_dma_address(&rx_sg[i]) = dma_dsts[i] + dst_off;
//dma一次传输的长度
sg_dma_len(&tx_sg[i]) = len;
sg_dma_len(&rx_sg[i]) = len;
}
//准备发送sg
*/
if(1 == is_send_or_recv)
{
txd = tx_dev->device_prep_slave_sg(tx_chan, tx_sg, bd_cnt,
DMA_MEM_TO_DEV, flags, NULL);
#ifdef MYDEBUG
printk("txd=%d\n",txd);
#endif
if (!txd) {
for (i = 0; i < src_cnt; i++)
dma_unmap_single(tx_dev->dev, dma_srcs[i], len,
DMA_MEM_TO_DEV);
pr_warn("%s: #%u: prep error with src_off=0x%x ",
thread_name, total_tests - 1, src_off);
msleep(5);
continue;
}
/**
//发送的dma,
*/
init_completion(&tx_cmp);
/**
//dma搬运完这个函数会运行
*/
txd->callback = dmatest_slave_tx_callback;
txd->callback_param = &tx_cmp;
tx_cookie = txd->tx_submit(txd);
if (dma_submit_error(tx_cookie)) {
pr_warn("%s: #%u: submit error %d/%d with src_off=0x%x ",
thread_name, total_tests - 1,
rx_cookie, tx_cookie, src_off);
pr_warn("dst_off=0x%x len=0x%x\n",
dst_off, len);
msleep(5);
continue;
}
/**
//这个执行后就相当于给dma控制器发命令了,dma控制器就会根据我们提供的参数来自己搬运数据了
*/
dma_async_issue_pending(tx_chan);
tx_tmo = wait_for_completion_timeout(&tx_cmp, tx_tmo);
/**
//等待发送dma传输完 等completion
*/
status = dma_async_is_tx_complete(tx_chan, tx_cookie,
NULL, NULL);
if (tx_tmo == 0) {
pr_warn("%s: #%u: tx test timed out\n",
thread_name, total_tests - 1);
continue;
} else if (status != DMA_COMPLETE) {
pr_warn("%s: #%u: tx got completion callback, ",
thread_name, total_tests - 1);
pr_warn("but status is \'%s\'\n",
status == DMA_ERROR ? "error" :
"in progress");
continue;
}
is_send_or_recv = 0;
#ifdef MYDEBUG
printk("iverson dma send exec over!\n");
#endif
//dma_unmap_single ?
}
else if(2 == is_send_or_recv)
{
//receive sg
rxd = rx_dev->device_prep_slave_sg(rx_chan, rx_sg, bd_cnt,
DMA_DEV_TO_MEM, flags, NULL);
#ifdef MYDEBUG
printk("rxd=%d\n",rxd);
#endif
if (!rxd) {
for (i = 0; i < dst_cnt; i++)
dma_unmap_single(rx_dev->dev, dma_dsts[i],
test_buf_size,
DMA_BIDIRECTIONAL);
pr_warn("dst_off=0x%x len=0x%x\n",
dst_off, len);
msleep(5);
continue;
}
/**
//接收的dma,先准备接收的
*/
init_completion(&rx_cmp);
/**
//dma搬运完这个函数会运行
*/
rxd->callback = dmatest_slave_rx_callback;
rxd->callback_param = &rx_cmp;
rx_cookie = rxd->tx_submit(rxd);
if (dma_submit_error(rx_cookie)) {
pr_warn("%s: #%u: submit error %d/%d with src_off=0x%x ",
thread_name, total_tests - 1,
rx_cookie, tx_cookie, src_off);
pr_warn("dst_off=0x%x len=0x%x\n",
dst_off, len);
msleep(5);
//failed_tests++;
continue;
}
dma_async_issue_pending(rx_chan);
rx_tmo = wait_for_completion_timeout(&rx_cmp, rx_tmo);
//等待接收传输完
status = dma_async_is_tx_complete(rx_chan, rx_cookie,
NULL, NULL);
if (rx_tmo == 0) {
pr_warn("%s: #%u: rx test timed out\n",
thread_name, total_tests - 1);
continue;
} else if (status != DMA_COMPLETE) {
pr_warn("%s: #%u: rx got completion callback, ",
thread_name, total_tests - 1);
pr_warn("but status is \'%s\'\n",
status == DMA_ERROR ? "error" :
"in progress");
continue;
}
/**
//前面是先映射,这里是解除映射,这样缓冲区的数据才是正确的,不执行这个操作,缓冲区数据不对。
*/
/* Unmap by myself*/
for (i = 0; i < dst_cnt; i++){
/**
* DMA_BIDIRECTIONAL。这个参数表示DMA传输可以是任何一个方向。平台可以保证我们这样做且能够正常工作,但是是以性能为代价的。
* */
dma_unmap_single(rx_dev->dev, dma_dsts[i],
test_buf_size, DMA_BIDIRECTIONAL);
}
//src_cnt == 1,bd_cnt == 1
memcpy(gexchangebuf,thread->dsts[0] + dst_off,send_or_recv_len);
is_send_or_recv = 0;
#ifdef MYDEBUG
printk("dma read exec over!\n");
#endif
}
/*
error_count = 0;
start = ktime_get();
pr_debug("%s: verifying source buffer...\n", thread_name);
//--校验接收的数据和发送的数据,
error_count += dmatest_verify(thread->srcs, 0, src_off,
0, PATTERN_SRC, true);
error_count += dmatest_verify(thread->srcs, src_off,
src_off + len, src_off,
PATTERN_SRC | PATTERN_COPY, true);
error_count += dmatest_verify(thread->srcs, src_off + len,
test_buf_size, src_off + len,
PATTERN_SRC, true);
pr_debug("%s: verifying dest buffer...\n",
thread->task->comm);
error_count += dmatest_verify(thread->dsts, 0, dst_off,
0, PATTERN_DST, false);
error_count += dmatest_verify(thread->dsts, dst_off,
dst_off + len, src_off,
PATTERN_SRC | PATTERN_COPY, false);
error_count += dmatest_verify(thread->dsts, dst_off + len,
test_buf_size, dst_off + len,
PATTERN_DST, false);
diff = ktime_sub(ktime_get(), start);
comparetime = ktime_add(comparetime, diff);
if (error_count) {
pr_warn("%s: #%u: %u errors with ",
thread_name, total_tests - 1, error_count);
pr_warn("src_off=0x%x dst_off=0x%x len=0x%x\n",
src_off, dst_off, len);
failed_tests++;
} else {
pr_debug("%s: #%u: No errors with ",
thread_name, total_tests - 1);
pr_debug("src_off=0x%x dst_off=0x%x len=0x%x\n",
src_off, dst_off, len);
}*/
}
ktime = ktime_sub(ktime_get(), ktime);
ktime = ktime_sub(ktime, comparetime);
ktime = ktime_sub(ktime, filltime);
runtime = ktime_to_us(ktime);
ret = 0;
for (i = 0; thread->dsts[i]; i++)
kfree(thread->dsts[i]);
err_dstbuf:
kfree(thread->dsts);
err_dsts:
for (i = 0; thread->srcs[i]; i++)
kfree(thread->srcs[i]);
err_srcbuf:
kfree(thread->srcs);
err_srcs:
pr_notice("%s: terminating after %u tests, %u failures %llu iops %llu KB/s (status %d)\n",
thread_name, total_tests, failed_tests,
dmatest_persec(runtime, total_tests),
dmatest_KBs(runtime, total_len), ret);
thread->done = true;
wake_up(&thread_wait);
return ret;
}
static void dmatest_cleanup_channel(struct dmatest_chan *dtc)
{
struct dmatest_slave_thread *thread;
struct dmatest_slave_thread *_thread;
int ret;
list_for_each_entry_safe(thread, _thread, &dtc->threads, node) {
ret = kthread_stop(thread->task);
pr_debug("dmatest: thread %s exited with status %d\n",
thread->task->comm, ret);
list_del(&thread->node);
put_task_struct(thread->task);
kfree(thread);
}
kfree(dtc);
}
static int dmatest_add_slave_threads(struct dmatest_chan *tx_dtc,
struct dmatest_chan *rx_dtc)
{
struct dmatest_slave_thread *thread;
struct dma_chan *tx_chan = tx_dtc->chan;
struct dma_chan *rx_chan = rx_dtc->chan;
int ret;
thread = kzalloc(sizeof(struct dmatest_slave_thread), GFP_KERNEL);
if (!thread) {
pr_warn("dmatest: No memory for slave thread %s-%s\n",
dma_chan_name(tx_chan), dma_chan_name(rx_chan));
}
thread->tx_chan = tx_chan;
thread->rx_chan = rx_chan;
thread->type = (enum dma_transaction_type)DMA_SLAVE;
/* Ensure that all previous writes are complete */
smp_wmb();
thread->task = kthread_run(dmatest_slave_func, thread, "%s-%s",
dma_chan_name(tx_chan),
dma_chan_name(rx_chan));
ret = PTR_ERR(thread->task);
if (IS_ERR(thread->task)) {
pr_warn("dmatest: Failed to run thread %s-%s\n",
dma_chan_name(tx_chan), dma_chan_name(rx_chan));
kfree(thread);
return ret;
}
/* srcbuf and dstbuf are allocated by the thread itself */
get_task_struct(thread->task);
list_add_tail(&thread->node, &tx_dtc->threads);
/* Added one thread with 2 channels */
return 1;
}
static int dmatest_add_slave_channels(struct dma_chan *tx_chan,
struct dma_chan *rx_chan)
{
struct dmatest_chan *tx_dtc;
struct dmatest_chan *rx_dtc;
unsigned int thread_count = 0;
tx_dtc = kmalloc(sizeof(struct dmatest_chan), GFP_KERNEL);
if (!tx_dtc) {
pr_warn("dmatest: No memory for tx %s\n",
dma_chan_name(tx_chan));
return -ENOMEM;
}
rx_dtc = kmalloc(sizeof(struct dmatest_chan), GFP_KERNEL);
if (!rx_dtc) {
pr_warn("dmatest: No memory for rx %s\n",
dma_chan_name(rx_chan));
return -ENOMEM;
}
tx_dtc->chan = tx_chan;
rx_dtc->chan = rx_chan;
INIT_LIST_HEAD(&tx_dtc->threads);
INIT_LIST_HEAD(&rx_dtc->threads);
dmatest_add_slave_threads(tx_dtc, rx_dtc);
thread_count += 1;
pr_info("dmatest: Started %u threads using %s %s\n",
thread_count, dma_chan_name(tx_chan), dma_chan_name(rx_chan));
list_add_tail(&tx_dtc->node, &dmatest_channels);
list_add_tail(&rx_dtc->node, &dmatest_channels);
nr_channels += 2;
//wait thread exe over!
// if (iterations)
// wait_event(thread_wait, !is_threaded_test_run(tx_dtc, rx_dtc));
return 0;
}
static int xilinx_axidmatest_probe(struct platform_device *pdev)
{
/*
struct dma_chan *chan, *rx_chan;
*/
int err;
global_tx_chan = dma_request_chan(&pdev->dev, "axidma0");
if (IS_ERR(global_tx_chan)) {
err = PTR_ERR(global_tx_chan);
if (err != -EPROBE_DEFER)
pr_err("xilinx_dmatest: No Tx channel\n");
return err;
}
global_rx_chan = dma_request_chan(&pdev->dev, "axidma1");
if (IS_ERR(global_rx_chan)) {
err = PTR_ERR(global_rx_chan);
if (err != -EPROBE_DEFER)
pr_err("xilinx_dmatest: No Rx channel\n");
goto free_tx;
}
/*
//添加发送通道和接收通道
err = dmatest_add_slave_channels(chan, rx_chan);
if (err) {
pr_err("xilinx_dmatest: Unable to add channels\n");
goto free_rx;
}*/
return 0;
free_rx:
dma_release_channel(global_rx_chan);
free_tx:
dma_release_channel(global_tx_chan);
return err;
}
static int xilinx_axidmatest_remove(struct platform_device *pdev)
{
struct dmatest_chan *dtc, *_dtc;
struct dma_chan *chan;
list_for_each_entry_safe(dtc, _dtc, &dmatest_channels, node) {
list_del(&dtc->node);
chan = dtc->chan;
dmatest_cleanup_channel(dtc);
pr_info("xilinx_dmatest: dropped channel %s\n",
dma_chan_name(chan));
dmaengine_terminate_all(chan);
dma_release_channel(chan);
}
return 0;
}
#define DMA_LENGTH MY_MAX_BUF_SIZE
static struct device *mydevice = NULL;
static int major;
static struct class *dma_class = NULL;
static int dma_init(void);
static void dma_exit(void);
static int dma_open(struct inode *inode,struct file *file);
static int dma_release(struct inode *inode,struct file *file);
static ssize_t dma_write(struct file *file,const char __user *buf, size_t count,loff_t *ppos);
static ssize_t dma_read(struct file *file,char __user *buf,size_t size,loff_t *ppos);
/*file_operations 结构数据,沟通内核与操作系统桥梁*/
static struct file_operations dma_lops=
{
.owner = THIS_MODULE,
.read = dma_read,
.open = dma_open,
.write = dma_write,
.release = dma_release,
};
/*
* 初始化,用于module init
* */
static int dma_init(void)
{
//int err,nouse;
major=register_chrdev(0,"ps_dma_dev",&dma_lops);
dma_class= class_create(THIS_MODULE,"ps_dma_dev");
mydevice = device_create(dma_class,NULL,MKDEV(major,0),NULL,"ps_dma_dev");
#ifdef MYDEBUG
printk("major dev number= %d\n",major);
#endif
return 0;
}
/*退出 用于 module exit */
static void dma_exit(void)
{
unregister_chrdev(major,"ps_dma_dev");
device_destroy(dma_class,MKDEV(major,0));
class_destroy(dma_class);
return ;
}
//open 接口函数
static int dma_open(struct inode *inode,struct file *file)
{
int i;
int src_cnt;
int dst_cnt;
int bd_cnt = XILINX_DMATEST_BD_CNT;
//这里为11,实际上是创建11个sg,sg支持多个分块dma传输,这里可以改成1,就一块
dst_cnt = bd_cnt;
src_cnt = bd_cnt;
#ifdef MYDEBUG
printk(" global_srcs = kcalloc\n");
#endif
//申请发送缓冲区数据结构,
global_srcs = kcalloc(src_cnt + 1, sizeof(u8 *), GFP_KERNEL);
if (!global_srcs)
return -1;
#ifdef MYDEBUG
printk(" global_srcs[i] = kmalloc\n");
#endif
//申请发送缓冲区,这里面放发送的数据,往datafifo发送的数据
for (i = 0; i < src_cnt; i++) {
global_srcs[i] = kmalloc(test_buf_size, GFP_KERNEL);
if (!global_srcs[i])
return -1;
}
global_srcs[i] = NULL;
#ifdef MYDEBUG
printk(" global_dsts = kcalloc\n");
#endif
//申请接收缓冲区数据结构,
global_dsts = kcalloc(dst_cnt + 1, sizeof(u8 *), GFP_KERNEL);
if (!global_dsts)
return -1;
#ifdef MYDEBUG
printk(" global_dsts[i] = kmalloc\n");
#endif
for (i = 0; i < dst_cnt; i++) {
global_dsts[i] = kmalloc(test_buf_size, GFP_KERNEL);
if (!global_dsts[i])
return -1;
}
global_dsts[i] = NULL;
#ifdef MYDEBUG
printk("DMA open\n");
#endif
return 0;
}
//release 接口函数
static int dma_release(struct inode *inode,struct file *file)
{
#ifdef MYDEBUG
printk("DMA release\n");
#endif
int i;
for (i = 0; global_dsts[i]; i++)
kfree(global_dsts[i]);
kfree(global_dsts);
for (i = 0; global_srcs[i]; i++)
kfree(global_srcs[i]);
kfree(global_srcs);
return 0;
}
// write 接口函数
static ssize_t dma_write(struct file *file,const char __user *buf, size_t count,loff_t *ppos)
{
static int cnt;
#ifdef MYDEBUG
printk("dma write start !..................................%d\n",++cnt);
#endif
if(count > DMA_LENGTH)
{
#ifdef MYDEBUG
printk("the number of data is too large!\n");
#endif
return 0;
}
struct dma_device *tx_dev = global_tx_chan->device;
struct dma_device *rx_dev = global_rx_chan->device;
struct dma_async_tx_descriptor *txd = NULL;
dma_addr_t dma_srcs[XILINX_DMATEST_BD_CNT];
struct completion tx_cmp;
unsigned long tx_tmo = msecs_to_jiffies(30000 / 100);
u8 align = 0;
/**
//定义发送dma数据结构,
*/
struct scatterlist tx_sg[XILINX_DMATEST_BD_CNT];
int i;
int src_cnt;
int dst_cnt;
int bd_cnt = XILINX_DMATEST_BD_CNT;
unsigned int src_off, dst_off,len;
unsigned long long total_len = 0;
enum dma_status status;
enum dma_ctrl_flags flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
dma_cookie_t tx_cookie;
dst_cnt = bd_cnt;
src_cnt = bd_cnt;
/**
//----------------字节对齐,dma传输要求字节对齐
*/
/* honor larger alignment restrictions */
align = tx_dev->copy_align;
if (rx_dev->copy_align > align)
align = rx_dev->copy_align;
if (1 << align > test_buf_size) {
//pr_err("%u-byte buffer too small for %d-byte alignment\n",
// test_buf_size, 1 << align);
return 0;
}
len = count % test_buf_size + 1;
len = (len >> align) << align;
if (!len)
len = 1 << align;
total_len += len;
src_off = count % (test_buf_size - len + 1);
dst_off = count % (test_buf_size - len + 1);
src_off = (src_off >> align) << align;
dst_off = (dst_off >> align) << align;
//src_cnt == 1,bd_cnt == 1
memcpy(global_srcs[0] + src_off,buf,len);
#ifdef MYDEBUG
printk("dma write len=%d\tsrc_off=%d\tdst_off=%d\n",len,src_off,dst_off);
#endif
for (i = 0; i < src_cnt; i++)
{
u8 *tmp_buf = global_srcs[i] + src_off;
/**
//dma映射后获取的物理地址,实际上dma初始化用的是物理地址,thread->srcs[i] 这个是虚拟地址,是程序可以直接使用的,但是dma用的是物理地址
*/
dma_srcs[i] = dma_map_single(tx_dev->dev, tmp_buf, len,
DMA_MEM_TO_DEV);
}
/**
//初始化发送接收数据结构----------------------
*/
sg_init_table(tx_sg, bd_cnt);
for (i = 0; i < bd_cnt; i++)
{
//物理地址
sg_dma_address(&tx_sg[i]) = dma_srcs[i];
//dma一次传输的长度
sg_dma_len(&tx_sg[i]) = len;
}
txd = tx_dev->device_prep_slave_sg(global_tx_chan, tx_sg, bd_cnt,
DMA_MEM_TO_DEV, flags, NULL);
#ifdef MYDEBUG
printk("txd=%d\n",txd);
#endif
if (!txd) {
for (i = 0; i < src_cnt; i++)
dma_unmap_single(tx_dev->dev, dma_srcs[i], len,
DMA_MEM_TO_DEV);
return 0;
}
/**
//发送的dma,
*/
init_completion(&tx_cmp);
/**
//dma搬运完这个函数会运行
*/
txd->callback = dmatest_slave_tx_callback;
txd->callback_param = &tx_cmp;
tx_cookie = txd->tx_submit(txd);
if (dma_submit_error(tx_cookie)) {
return 0;
}
/**
//这个执行后就相当于给dma控制器发命令了,dma控制器就会根据我们提供的参数来自己搬运数据了
*/
dma_async_issue_pending(global_tx_chan);
tx_tmo = wait_for_completion_timeout(&tx_cmp, tx_tmo);
//等待发送dma传输完 等completion
status = dma_async_is_tx_complete(global_tx_chan, tx_cookie,
NULL, NULL);
if (tx_tmo == 0) {
//pr_warn("%s: #%u: tx test timed out\n",
//thread_name, total_tests - 1);
//continue;
#ifdef MYDEBUG
printk("tx test timed out\n");
#endif
return 0;
}
else if (status != DMA_COMPLETE) {
return 0;
}
//dma_unmap_single ?
for (i = 0; i < src_cnt; i++)
dma_unmap_single(tx_dev->dev, dma_srcs[i], len,
DMA_MEM_TO_DEV);
#ifdef MYDEBUG
printk("dma write ok!\n");
#endif
return count;
}
/*
* read 接口函数
* DMA读取数据是按照32bit读取的
* */
static ssize_t dma_read(struct file *file,char __user *buf,size_t size,loff_t *ppos)
{
static int cnt;
#ifdef MYDEBUG
printk("dma read start !..................................%d\n",++cnt);
#endif
if(size > DMA_LENGTH)
{
#ifdef MYDEBUG
printk("the number of data is not enough!\n");
#endif
return 0;
}
struct dma_device *tx_dev = global_tx_chan->device;
struct dma_device *rx_dev = global_rx_chan->device;
struct dma_async_tx_descriptor *rxd = NULL;
dma_addr_t dma_dsts[XILINX_DMATEST_BD_CNT];
struct completion rx_cmp;
unsigned long rx_tmo = msecs_to_jiffies(300000 / 100); /* RX takes longer */
u8 align = 0;
int i;
int src_cnt;
int dst_cnt;
int bd_cnt = XILINX_DMATEST_BD_CNT;
unsigned int src_off, dst_off,len;
unsigned long long total_len = 0;
enum dma_status status;
enum dma_ctrl_flags flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
dma_cookie_t rx_cookie;
dst_cnt = bd_cnt;
src_cnt = bd_cnt;
/**
//定义接收dma数据结构,
*/
struct scatterlist rx_sg[XILINX_DMATEST_BD_CNT];
/**
//----------------字节对齐,dma传输要求字节对齐
*/
/* honor larger alignment restrictions */
align = tx_dev->copy_align;
if (rx_dev->copy_align > align)
align = rx_dev->copy_align;
if (1 << align > test_buf_size) {
//pr_err("%u-byte buffer too small for %d-byte alignment\n",
// test_buf_size, 1 << align);
return 0;
}
len = size % test_buf_size + 1;
len = (len >> align) << align;
if (!len)
len = 1 << align;
total_len += len;
src_off = size % (test_buf_size - len + 1);
dst_off = size % (test_buf_size - len + 1);
src_off = (src_off >> align) << align;
dst_off = (dst_off >> align) << align;
for (i = 0; i < dst_cnt; i++)
{
/**
dma映射后获取的物理地址,实际上dma初始化用的是物理地址,thread->srcs[i] 这个是虚拟地址,是程序可以直接使用的,但是dma用的是物理地址
*/
dma_dsts[i] = dma_map_single(rx_dev->dev,
global_dsts[i],
test_buf_size,
DMA_BIDIRECTIONAL);
}
/**
//初始化发送接收数据结构----------------------
*/
sg_init_table(rx_sg, bd_cnt);
for (i = 0; i < bd_cnt; i++)
{
//物理地址
sg_dma_address(&rx_sg[i]) = dma_dsts[i] + dst_off;
//dma一次传输的长度
sg_dma_len(&rx_sg[i]) = len;
}
//receive sg
rxd = rx_dev->device_prep_slave_sg(global_rx_chan, rx_sg, bd_cnt,
DMA_DEV_TO_MEM, flags, NULL);
#ifdef MYDEBUG
printk("ret rxd=%d\n",rxd);
#endif
if (!rxd) {
for (i = 0; i < dst_cnt; i++)
dma_unmap_single(rx_dev->dev,
dma_dsts[i],
test_buf_size,
DMA_BIDIRECTIONAL);
//pr_warn("dst_off=0x%x len=0x%x\n",
// dst_off, len);
return 0;
}
/**
接收的dma,先准备接收的
*/
init_completion(&rx_cmp);
/**
dma搬运完这个函数会运行
*/
rxd->callback = dmatest_slave_rx_callback;
rxd->callback_param = &rx_cmp;
rx_cookie = rxd->tx_submit(rxd);
if (dma_submit_error(rx_cookie)) {
//pr_warn("%s: #%u: submit error %d/%d with src_off=0x%x ",
// thread_name, total_tests - 1,
// rx_cookie, tx_cookie, src_off);
//pr_warn("dst_off=0x%x len=0x%x\n",
// dst_off, len);
return 0;
}
dma_async_issue_pending(global_rx_chan);
rx_tmo = wait_for_completion_timeout(&rx_cmp, rx_tmo);
//等待接收传输完
status = dma_async_is_tx_complete(global_rx_chan, rx_cookie,
NULL, NULL);
if (rx_tmo == 0) {
//pr_warn("%s: #%u: rx test timed out\n",
// thread_name, total_tests - 1);
#ifdef MYDEBUG
printk("rx test timed out\n");
#endif
return 0;
} else if (status != DMA_COMPLETE) {
//pr_warn("%s: #%u: rx got completion callback, ",
// thread_name, total_tests - 1);
//pr_warn("but status is \'%s\'\n",
// status == DMA_ERROR ? "error" :
// "in progress");
return 0;
}
/**
前面是先映射,这里是解除映射,这样缓冲区的数据才是正确的,不执行这个操作,缓冲区数据不对。
*/
for (i = 0; i < dst_cnt; i++){
/**
* DMA_BIDIRECTIONAL。这个参数表示DMA传输可以是任何一个方向。平台可以保证我们这样做且能够正常工作,但是是以性能为代价的。
* */
dma_unmap_single(rx_dev->dev, dma_dsts[i],
test_buf_size, DMA_BIDIRECTIONAL);
}
if(copy_to_user(buf,global_dsts[0] + dst_off,size))
{
return -EFAULT;
}
#ifdef MYDEBUG
printk("\ndma read ok!\n");
#endif
return size;
}
/
static const struct of_device_id xilinx_axidmatest_of_ids[] = {
{ .compatible = "xlnx,axi-dma-test-1.00.a",},
{}
};
static struct platform_driver xilinx_axidmatest_driver = {
.driver = {
.name = "xilinx_axidmatest",
.of_match_table = xilinx_axidmatest_of_ids,
},
.probe = xilinx_axidmatest_probe,
.remove = xilinx_axidmatest_remove,
};
static int __init axidma_init(void)
{
dma_init();
return platform_driver_register(&xilinx_axidmatest_driver);
}
late_initcall(axidma_init);
static void __exit axidma_exit(void)
{
platform_driver_unregister(&xilinx_axidmatest_driver);
dma_exit();
}
module_exit(axidma_exit)
MODULE_AUTHOR("Xilinx, Inc.");
MODULE_DESCRIPTION("Xilinx AXI DMA Test Client");
MODULE_LICENSE("GPL v2");
3.应用层测试程序
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <stdlib.h>
void delay(void)
{
int i,j;
for(i=0;i<20000;i++)
for(j=0;j<10000;j++);
}
unsigned char rxbuf[1024 * 768 * 4 + 4] = {0};
int main(int argc , char ** argv)
{
int fd;
int i = 0;
int readcnt;
printf("main start...\n");
fd = open("/dev/ps_dma_dev",O_RDWR);
if(fd < 0)
{
printf("can not open file\n");
return -1;
}
else
{
printf("open file sucuss\n");
}
delay();
struct timeval start, end; // define 2 struct timeval variables
//read test
while(1)
{
memset(rxbuf,0,sizeof(rxbuf));
gettimeofday(&start, NULL); // get the beginning time
//bytes,fd,rxbuf,bytes
readcnt = read(fd,rxbuf,sizeof(rxbuf));
gettimeofday(&end, NULL); // get the end time
if(readcnt > 0)
{
long long total_time = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec); // get the run time by microsecond
printf("readcnt=%d\ttime=%lld ms\n", readcnt,total_time / 1000);
printf("bytes[0--23]=%08x\t%08x\t%08x\t%08x\t%08x\t%08x\n",
rxbuf[0] << 24 |
rxbuf[1] << 16 |
rxbuf[2] << 8 |
rxbuf[3],
rxbuf[4] << 24 |
rxbuf[5] << 16 |
rxbuf[6] << 8 |
rxbuf[7],
rxbuf[8] << 24 |
rxbuf[9] << 16 |
rxbuf[10] << 8 |
rxbuf[11],
rxbuf[12] << 24 |
rxbuf[13] << 16 |
rxbuf[14] << 8 |
rxbuf[15],
rxbuf[16] << 24 |
rxbuf[17] << 16 |
rxbuf[18] << 8 |
rxbuf[19],
rxbuf[20] << 24 |
rxbuf[21] << 16 |
rxbuf[22] << 8 |
rxbuf[23]);
/*
for(i =0;i<100;i++)
{
printf("%02x ",rxbuf[i * 4 + 3]);
}
printf("==\n");*/
}
else
{
}
}
return 0;
}