基于xilinx平台的PS与PL通过DMA传输

麦德泽特

已于 2023-10-11 17:34:17 修改

阅读量409

点赞数

文章标签： fpga开发 linux 嵌入式硬件人工智能物联网单片机

于 2023-10-11 17:30:22 首次发布

本文链接：https://blog.csdn.net/weixin_44470384/article/details/133774746

版权

1.pl.dtsi

/*
 * CAUTION: This file is automatically generated by Xilinx.
 */


/ {
	amba_pl: amba_pl@0 {
		#address-cells = <2>;
		#size-cells = <2>;
		compatible = "simple-bus";
		ranges ;
		axi_dma_0: dma@80040000 {
			#dma-cells = <1>;
			clock-names = "s_axi_lite_aclk", "m_axi_sg_aclk", "m_axi_mm2s_aclk", "m_axi_s2mm_aclk";
			clocks = <&zynqmp_clk 71>, <&zynqmp_clk 71>, <&zynqmp_clk 71>, <&zynqmp_clk 71>;
			compatible = "xlnx,axi-dma-7.1", "xlnx,axi-dma-1.00.a";
			interrupt-names = "mm2s_introut", "s2mm_introut";
			interrupt-parent = <&gic>;
			interrupts = <0 92 4 0 93 4>;
			reg = <0x0 0x80040000 0x0 0x10000>;
			xlnx,addrwidth = <0x20>;
			xlnx,include-sg ;
			xlnx,sg-length-width = <0x14>;
			dma-channel@80040000 {
				compatible = "xlnx,axi-dma-mm2s-channel";
				dma-channels = <0x1>;
				interrupts = <0 92 4>;
				xlnx,datawidth = <0x20>;
				xlnx,device-id = <0x0>;
			};
			dma-channel@80040030 {
				compatible = "xlnx,axi-dma-s2mm-channel";
				dma-channels = <0x1>;
				interrupts = <0 93 4>;
				xlnx,datawidth = <0x20>;
				xlnx,device-id = <0x0>;
			};
		};
		axi_dynclk_0: axi_dynclk@80010000 {
			clock-names = "REF_CLK_I", "s00_axi_aclk";
			clocks = <&zynqmp_clk 71>, <&zynqmp_clk 71>;
			compatible = "xlnx,axi-dynclk-1.0";
			reg = <0x0 0x80010000 0x0 0x10000>;
			xlnx,s00-axi-addr-width = <0x5>;
			xlnx,s00-axi-data-width = <0x20>;
		};
		axi_vdma_0: dma@80020000 {
			#dma-cells = <1>;
			clock-names = "s_axi_lite_aclk", "m_axi_mm2s_aclk", "m_axis_mm2s_aclk";
			clocks = <&zynqmp_clk 71>, <&zynqmp_clk 72>, <&zynqmp_clk 72>;
			compatible = "xlnx,axi-vdma-6.3", "xlnx,axi-vdma-1.00.a";
			interrupt-names = "mm2s_introut";
			interrupt-parent = <&gic>;
			interrupts = <0 90 4>;
			reg = <0x0 0x80020000 0x0 0x10000>;
			xlnx,addrwidth = <0x40>;
			xlnx,flush-fsync = <0x1>;
			xlnx,num-fstores = <0x3>;
			dma-channel@80020000 {
				compatible = "xlnx,axi-vdma-mm2s-channel";
				interrupts = <0 90 4>;
				xlnx,datawidth = <0x18>;
				xlnx,device-id = <0x0>;
				xlnx,include-dre ;
			};
		};
		v_tc_0: v_tc@80030000 {
			clock-names = "clk", "s_axi_aclk";
			clocks = <&misc_clk_0>, <&zynqmp_clk 71>;
			compatible = "xlnx,v-tc-6.2", "xlnx,v-tc-6.1";
			interrupt-names = "irq";
			interrupt-parent = <&gic>;
			interrupts = <0 89 4>;
			reg = <0x0 0x80030000 0x0 0x10000>;
			xlnx,generator ;
		};
		misc_clk_0: misc_clk_0 {
			#clock-cells = <0>;
			clock-frequency = <100000000>;
			compatible = "fixed-clock";
		};
	};
};

system-user.dtsi

#include <dt-bindings/gpio/gpio.h>
#include <dt-bindings/pinctrl/pinctrl-zynqmp.h>

/include/ "system-conf.dtsi"
/ {
   model = "Zynq MpSoc Development Board";
   
   chosen {
   bootargs = "earlycon console=ttyPS0,115200 clk_ignore_unused root=/dev/mmcblk1p2 rw rootwait";
   stdout-path = "serial0:115200n8";
   };
   
};


&amba_pl {
      axidmatest_0: axidmatest@0 {
              compatible ="xlnx,axi-dma-test-1.00.a";
              dmas = <&axi_dma_0 0 &axi_dma_0 1>;
              dma-names = "axidma0","axidma1";
      };    
};


/* SD */
&sdhci1 {
	disable-wp;
	no-1-8-v;
};

/* USB */
&dwc3_0 {
	status = "okay";
	dr_mode = "host";
};

&amba {
	zyxclmm_drm {
		compatible = "xlnx,zocl";
		status = "okay";
	};
};

&i2c0 {
	status = "okay";
	clock-frequency = <100000>;

	xilinx_an071@5D {	
		compatible = "goodix,gt9xx";
		reg = <0x5D>;
		interrupt-parent = <&gic>;
		interrupts = <0 91 4>;
	};
};

2.内核驱动程序基于axidmatest.c修改

// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * XILINX AXI DMA and MCDMA Engine test module
 *
 * Copyright (C) 2010 Xilinx, Inc. All rights reserved.
 *
 * Based on Atmel DMA Test Client
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#include <linux/delay.h>
#include <linux/dmaengine.h>
#include <linux/init.h>
#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/of_dma.h>
#include <linux/platform_device.h>
#include <linux/random.h>
#include <linux/slab.h>
#include <linux/wait.h>
#include <linux/sched/task.h>
#include <linux/dma/xilinx_dma.h>


#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/device.h>
#include <asm/io.h>
#include <linux/platform_device.h>
#include <linux/miscdevice.h>
#include <linux/ioport.h>
#include <linux/of.h>
#include <linux/uaccess.h>
#include <asm/uaccess.h>
#include <linux/dma-mapping.h>


//#define MYDEBUG 

//内核DMA map长度或者单次能够传输的最大长度
#define MY_MAX_BUF_SIZE (327680 * 20) 

static struct dma_chan *global_tx_chan;
static struct dma_chan *global_rx_chan;

static u8 **global_srcs;
static u8 **global_dsts;

static unsigned int test_buf_size = MY_MAX_BUF_SIZE;;
module_param(test_buf_size, uint, 0444);
MODULE_PARM_DESC(test_buf_size, "Size of the memcpy test buffer");

static unsigned int iterations = 5;
module_param(iterations, uint, 0444);
MODULE_PARM_DESC(iterations,
		 "Iterations before stopping test (default: infinite)");


static int is_send_or_recv = 0;
static u8 gexchangebuf[MY_MAX_BUF_SIZE] = {0};

static int send_or_recv_len = 0;

static u8 is_dmatest_slave_tx_callback = 0;
static u8 is_dmatest_slave_rx_callback = 0;


/*
 * Initialization patterns. All bytes in the source buffer has bit 7
 * set, all bytes in the destination buffer has bit 7 cleared.
 *
 * Bit 6 is set for all bytes which are to be copied by the DMA
 * engine. Bit 5 is set for all bytes which are to be overwritten by
 * the DMA engine.
 *
 * The remaining bits are the inverse of a counter which increments by
 * one for each byte address.
 */
#define PATTERN_SRC		0x80
#define PATTERN_DST		0x00
#define PATTERN_COPY		0x40
#define PATTERN_OVERWRITE	0x20
#define PATTERN_COUNT_MASK	0x1f

#define XILINX_DMATEST_BD_CNT	1//11

struct dmatest_slave_thread {
	struct list_head node;
	struct task_struct *task;
	struct dma_chan *tx_chan;
	struct dma_chan *rx_chan;
	u8 **srcs;
	u8 **dsts;
	enum dma_transaction_type type;
	bool done;
};

struct dmatest_chan {
	struct list_head node;
	struct dma_chan *chan;
	struct list_head threads;
};

/*
 * These are protected by dma_list_mutex since they're only used by
 * the DMA filter function callback
 */
static DECLARE_WAIT_QUEUE_HEAD(thread_wait);
static LIST_HEAD(dmatest_channels);
static unsigned int nr_channels;

static unsigned long long dmatest_persec(s64 runtime, unsigned int val)
{
	unsigned long long per_sec = 1000000;

	if (runtime <= 0)
		return 0;

	/* drop precision until runtime is 32-bits */
	while (runtime > UINT_MAX) {
		runtime >>= 1;
		per_sec <<= 1;
	}

	per_sec *= val;
	do_div(per_sec, runtime);
	return per_sec;
}

static unsigned long long dmatest_KBs(s64 runtime, unsigned long long len)
{
	return dmatest_persec(runtime, len >> 10);
}

static bool is_threaded_test_run(struct dmatest_chan *tx_dtc,
				 struct dmatest_chan *rx_dtc)
{
	struct dmatest_slave_thread *thread;
	int ret = false;

	list_for_each_entry(thread, &tx_dtc->threads, node) {
		if (!thread->done)
			ret = true;
	}

	list_for_each_entry(thread, &rx_dtc->threads, node) {
		if (!thread->done)
			ret = true;
	}
	return ret;
}

/*
static unsigned long dmatest_random(void)
{
	unsigned long buf;

	get_random_bytes(&buf, sizeof(buf));
	return buf;
}

static void dmatest_init_srcs(u8 **bufs, unsigned int start, unsigned int len)
{
	unsigned int i;
	u8 *buf;

	for (; (buf = *bufs); bufs++) {
		for (i = 0; i < start; i++)
			buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK);
		for ( ; i < start + len; i++)
			buf[i] = PATTERN_SRC | PATTERN_COPY
				| (~i & PATTERN_COUNT_MASK);
		for ( ; i < test_buf_size; i++)
			buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK);
	}
}

static void dmatest_init_dsts(u8 **bufs, unsigned int start, unsigned int len)
{
	unsigned int i;
	u8 *buf;

	for (; (buf = *bufs); bufs++) {
		for (i = 0; i < start; i++)
			buf[i] = PATTERN_DST | (~i & PATTERN_COUNT_MASK);
		for ( ; i < start + len; i++)
			buf[i] = PATTERN_DST | PATTERN_OVERWRITE
				| (~i & PATTERN_COUNT_MASK);
		for ( ; i < test_buf_size; i++)
			buf[i] = PATTERN_DST | (~i & PATTERN_COUNT_MASK);
	}
}


static void dmatest_mismatch(u8 actual, u8 pattern, unsigned int index,
			     unsigned int counter, bool is_srcbuf)
{
	u8 diff = actual ^ pattern;
	u8 expected = pattern | (~counter & PATTERN_COUNT_MASK);
	const char *thread_name = current->comm;

	if (is_srcbuf)
		pr_warn("%s: srcbuf[0x%x] overwritten! Expected %02x, got %02x\n",
			thread_name, index, expected, actual);
	else if ((pattern & PATTERN_COPY) &&
		 (diff & (PATTERN_COPY | PATTERN_OVERWRITE)))
		pr_warn("%s: dstbuf[0x%x] not copied! Expected %02x, got %02x\n",
			thread_name, index, expected, actual);
	else if (diff & PATTERN_SRC)
		pr_warn("%s: dstbuf[0x%x] was copied! Expected %02x, got %02x\n",
			thread_name, index, expected, actual);
	else
		pr_warn("%s: dstbuf[0x%x] mismatch! Expected %02x, got %02x\n",
			thread_name, index, expected, actual);
}


static unsigned int dmatest_verify(u8 **bufs, unsigned int start,
				   unsigned int end, unsigned int counter,
				   u8 pattern, bool is_srcbuf)
{
	unsigned int i;
	unsigned int error_count = 0;
	u8 actual;
	u8 expected;
	u8 *buf;
	unsigned int counter_orig = counter;

	for (; (buf = *bufs); bufs++) {
		counter = counter_orig;
		for (i = start; i < end; i++) {
			actual = buf[i];
			expected = pattern | (~counter & PATTERN_COUNT_MASK);
			if (actual != expected) {
				if (error_count < 32)
					dmatest_mismatch(actual, pattern, i,
							 counter, is_srcbuf);
				error_count++;
			}
			counter++;
		}
	}

	if (error_count > 32)
		pr_warn("%s: %u errors suppressed\n",
			current->comm, error_count - 32);

	return error_count;
}*/

static void dmatest_slave_tx_callback(void *completion)
{
	complete(completion);

	is_dmatest_slave_tx_callback = 1;

#ifdef MYDEBUG
	printk("dmatest_slave_tx_callback...\n");
#endif
}

static void dmatest_slave_rx_callback(void *completion)
{
	complete(completion);

	is_dmatest_slave_rx_callback = 1;

#ifdef MYDEBUG
	printk("dmatest_slave_rx_callback...\n");
#endif
}



/* Function for slave transfers
 * Each thread requires 2 channels, one for transmit, and one for receive
 */
static int dmatest_slave_func(void *data)
{
	struct dmatest_slave_thread	*thread = data;
	struct dma_chan *tx_chan;
	struct dma_chan *rx_chan;
	const char *thread_name;
	unsigned int src_off, dst_off, len;
	unsigned int error_count;
	unsigned int failed_tests = 0;
	unsigned int total_tests  = 0;
	dma_cookie_t tx_cookie;
	dma_cookie_t rx_cookie;
	enum dma_status status;
	enum dma_ctrl_flags flags;
	int ret;
	int src_cnt;
	int dst_cnt;
	int bd_cnt = XILINX_DMATEST_BD_CNT;
	int i;

	ktime_t	ktime, start, diff;
	ktime_t	filltime = 0;
	ktime_t	comparetime = 0;
	s64 runtime = 0;
	unsigned long long total_len = 0;
	thread_name = current->comm;
	ret = -ENOMEM;


	/* Ensure that all previous reads are complete */
	smp_rmb();
	tx_chan = thread->tx_chan;
	rx_chan = thread->rx_chan;

        /**
          //这里为11，实际上是创建11个sg，sg支持多个分块dma传输，这里可以改成1，就一块
        */
	dst_cnt = bd_cnt;
	src_cnt = bd_cnt;

        /**
         //申请发送缓冲区数据结构，
        */
	thread->srcs = kcalloc(src_cnt + 1, sizeof(u8 *), GFP_KERNEL);
	if (!thread->srcs)
		goto err_srcs;

        /**
         //申请发送缓冲区，这里面放发送的数据，往datafifo发送的数据
        */
	for (i = 0; i < src_cnt; i++) {
		thread->srcs[i] = kmalloc(test_buf_size, GFP_KERNEL);
		if (!thread->srcs[i])
			goto err_srcbuf;
	}
	thread->srcs[i] = NULL;

        /**
        //申请接收缓冲区数据结构，
        */
	thread->dsts = kcalloc(dst_cnt + 1, sizeof(u8 *), GFP_KERNEL);
	if (!thread->dsts)
		goto err_dsts;

        /**
          //申请接收缓冲区，这里是dma从datafifo搬运的数据放到这里，
        */
	for (i = 0; i < dst_cnt; i++) {
		thread->dsts[i] = kmalloc(test_buf_size, GFP_KERNEL);
		if (!thread->dsts[i])
			goto err_dstbuf;
	}
	thread->dsts[i] = NULL;

	set_user_nice(current, 10);

	flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;

	ktime = ktime_get();
	while (!kthread_should_stop()) 
	{
	       
		struct dma_device *tx_dev = tx_chan->device;
		struct dma_device *rx_dev = rx_chan->device;
		
		struct dma_async_tx_descriptor *txd = NULL;
		struct dma_async_tx_descriptor *rxd = NULL;

		dma_addr_t dma_srcs[XILINX_DMATEST_BD_CNT];
		dma_addr_t dma_dsts[XILINX_DMATEST_BD_CNT];

		struct completion rx_cmp;
		struct completion tx_cmp;
		unsigned long rx_tmo = msecs_to_jiffies(300000 / 100); /* RX takes longer */
		unsigned long tx_tmo = msecs_to_jiffies(30000  / 100);

		u8 align = 0;

                /**
                  //定义发送dma数据结构，
                 */
		struct scatterlist tx_sg[XILINX_DMATEST_BD_CNT];
                /**
                  //定义接收dma数据结构，
                */
		struct scatterlist rx_sg[XILINX_DMATEST_BD_CNT];



		//idle time
		if(!is_send_or_recv)
		{
		    msleep(1);
		    continue;
		}		

		
                /**
                  //----------------字节对齐，dma传输要求字节对齐
                 */
		/* honor larger alignment restrictions */
		align = tx_dev->copy_align;
		if (rx_dev->copy_align > align)
			align = rx_dev->copy_align;

		if (1 << align > test_buf_size) {
			pr_err("%u-byte buffer too small for %d-byte alignment\n",
			       test_buf_size, 1 << align);
			break;
		}

		len = send_or_recv_len % test_buf_size + 1;
		len = (len >> align) << align;
		if (!len)
		    len = 1 << align;

		total_len += len;
		src_off = send_or_recv_len % (test_buf_size - len + 1);
		dst_off = send_or_recv_len % (test_buf_size - len + 1);

		src_off = (src_off >> align) << align;
		dst_off = (dst_off >> align) << align;


                //copy user data
		if(1 == is_send_or_recv)
		{
                     //src_cnt == 1,bd_cnt == 1
                      memcpy(thread->srcs[0] + src_off,gexchangebuf,len);
		
                      #ifdef MYDEBUG
		      printk("len=%d\tsrc_off=%d\tdst_off=%d\n",len,src_off,dst_off);
                      #endif

		      for (i = 0; i < src_cnt; i++) 
		      {
			    u8 *buf = thread->srcs[i] + src_off;

                            /**
                            //dma映射后获取的物理地址，实际上dma初始化用的是物理地址，thread->srcs[i] 这个是虚拟地址，是程序可以直接使用的，但是dma用的是物理地址
                            */
			   dma_srcs[i] = dma_map_single(tx_dev->dev, buf, len,
						     DMA_MEM_TO_DEV);
		      }

	        }


		if(2 == is_send_or_recv)
	        {

		    for (i = 0; i < dst_cnt; i++)
	            {

                       /**
                         //dma映射后获取的物理地址，实际上dma初始化用的是物理地址，thread->srcs[i] 这个是虚拟地址，是程序可以直接使用的，但是dma用的是物理地址
                       */
			dma_dsts[i] = dma_map_single(rx_dev->dev,
						     thread->dsts[i],
						     test_buf_size,
						     DMA_BIDIRECTIONAL);

		       //printk("dma_map_single over!\n");
		   }
		}

                
                /**
                 //初始化发送接收数据结构----------------------
                */
		sg_init_table(tx_sg, bd_cnt);
		sg_init_table(rx_sg, bd_cnt);

		for (i = 0; i < bd_cnt; i++)
	       	{
                         //物理地址
			sg_dma_address(&tx_sg[i]) = dma_srcs[i];
			sg_dma_address(&rx_sg[i]) = dma_dsts[i] + dst_off;

                        //dma一次传输的长度
			sg_dma_len(&tx_sg[i]) = len;
			sg_dma_len(&rx_sg[i]) = len;
		}

              
                 //准备发送sg
                */
		if(1 == is_send_or_recv)
		{

		     txd = tx_dev->device_prep_slave_sg(tx_chan, tx_sg, bd_cnt,
				DMA_MEM_TO_DEV, flags, NULL);

#ifdef MYDEBUG
		     printk("txd=%d\n",txd);
#endif

	 	     if (!txd) {
			for (i = 0; i < src_cnt; i++)
				dma_unmap_single(tx_dev->dev, dma_srcs[i], len,
						 DMA_MEM_TO_DEV);
			pr_warn("%s: #%u: prep error with src_off=0x%x ",
				thread_name, total_tests - 1, src_off);
			msleep(5);
			continue;
		    }


                    /**
                    //发送的dma，
                   */
	  	   init_completion(&tx_cmp);
 
                   /**
                    //dma搬运完这个函数会运行
                   */
	 	   txd->callback = dmatest_slave_tx_callback;
		   txd->callback_param = &tx_cmp;
		   tx_cookie = txd->tx_submit(txd);

		   if (dma_submit_error(tx_cookie)) {
			pr_warn("%s: #%u: submit error %d/%d with src_off=0x%x ",
				thread_name, total_tests - 1,
				rx_cookie, tx_cookie, src_off);
			pr_warn("dst_off=0x%x len=0x%x\n",
				dst_off, len);
			msleep(5);
			continue;
		   }

                   /**
                   //这个执行后就相当于给dma控制器发命令了，dma控制器就会根据我们提供的参数来自己搬运数据了
                   */
		  dma_async_issue_pending(tx_chan);

		  tx_tmo = wait_for_completion_timeout(&tx_cmp, tx_tmo);
                
                   /**
                   //等待发送dma传输完 等completion
                  */
		  status = dma_async_is_tx_complete(tx_chan, tx_cookie,
						  NULL, NULL);

		  if (tx_tmo == 0) {
			pr_warn("%s: #%u: tx test timed out\n",
				thread_name, total_tests - 1);
			continue;
		  } else if (status != DMA_COMPLETE) {
			pr_warn("%s: #%u: tx got completion callback, ",
				thread_name, total_tests - 1);
			pr_warn("but status is \'%s\'\n",
				status == DMA_ERROR ? "error" :
				"in progress");
			continue;
		  }


                    is_send_or_recv = 0;

#ifdef  MYDEBUG 
                    printk("iverson dma send  exec over!\n");
#endif
		    //dma_unmap_single ?

		}
		else if(2 == is_send_or_recv)
		{
	
		     //receive sg
		     rxd = rx_dev->device_prep_slave_sg(rx_chan, rx_sg, bd_cnt,
                                   DMA_DEV_TO_MEM, flags, NULL);
	

#ifdef  MYDEBUG
                     printk("rxd=%d\n",rxd);
#endif

                     if (!rxd) {
                        for (i = 0; i < dst_cnt; i++)
                                dma_unmap_single(rx_dev->dev, dma_dsts[i],
                                                 test_buf_size,
                                                 DMA_BIDIRECTIONAL);
                       
                        pr_warn("dst_off=0x%x len=0x%x\n",
                                dst_off, len);
                        msleep(5);
                        continue;
                    }
		     

                   /**
                     //接收的dma，先准备接收的
                    */
                     init_completion(&rx_cmp);
                    /**
                     //dma搬运完这个函数会运行
                    */
                    rxd->callback = dmatest_slave_rx_callback;
                    rxd->callback_param = &rx_cmp;
                    rx_cookie = rxd->tx_submit(rxd);		     


                    if (dma_submit_error(rx_cookie)) {
                        pr_warn("%s: #%u: submit error %d/%d with src_off=0x%x ",
                                thread_name, total_tests - 1,
                                rx_cookie, tx_cookie, src_off);
                        pr_warn("dst_off=0x%x len=0x%x\n",
                                dst_off, len);
                        msleep(5);
                        //failed_tests++;
                        continue;
                    }
		    

		    dma_async_issue_pending(rx_chan);

		    rx_tmo = wait_for_completion_timeout(&rx_cmp, rx_tmo);
        
                     //等待接收传输完
		    status = dma_async_is_tx_complete(rx_chan, rx_cookie,
						  NULL, NULL);
                   
		    if (rx_tmo == 0) {
			pr_warn("%s: #%u: rx test timed out\n",
				thread_name, total_tests - 1);
			continue;
		    } else if (status != DMA_COMPLETE) {
			pr_warn("%s: #%u: rx got completion callback, ",
				thread_name, total_tests - 1);
			pr_warn("but status is \'%s\'\n",
				status == DMA_ERROR ? "error" :
				"in progress");
			continue;
	 	   }
               
               
                   /**
                    //前面是先映射，这里是解除映射，这样缓冲区的数据才是正确的，不执行这个操作，缓冲区数据不对。
                   */
	  	   /* Unmap by myself*/ 
		    for (i = 0; i < dst_cnt; i++){
			/**
			 * DMA_BIDIRECTIONAL。这个参数表示DMA传输可以是任何一个方向。平台可以保证我们这样做且能够正常工作，但是是以性能为代价的。
			 * */
			dma_unmap_single(rx_dev->dev, dma_dsts[i],
					 test_buf_size, DMA_BIDIRECTIONAL);
		    }


                      //src_cnt == 1,bd_cnt == 1
                     memcpy(gexchangebuf,thread->dsts[0] + dst_off,send_or_recv_len);
                     is_send_or_recv = 0;

#ifdef MYDEBUG
                     printk("dma read  exec over!\n");
#endif		     

                }



/*
		error_count = 0;
		start = ktime_get();
		pr_debug("%s: verifying source buffer...\n", thread_name);

                
                   //--校验接收的数据和发送的数据，
                
		error_count += dmatest_verify(thread->srcs, 0, src_off,
				0, PATTERN_SRC, true);
		error_count += dmatest_verify(thread->srcs, src_off,
				src_off + len, src_off,
				PATTERN_SRC | PATTERN_COPY, true);
		error_count += dmatest_verify(thread->srcs, src_off + len,
				test_buf_size, src_off + len,
				PATTERN_SRC, true);

		pr_debug("%s: verifying dest buffer...\n",
			 thread->task->comm);
		error_count += dmatest_verify(thread->dsts, 0, dst_off,
				0, PATTERN_DST, false);
		error_count += dmatest_verify(thread->dsts, dst_off,
				dst_off + len, src_off,
				PATTERN_SRC | PATTERN_COPY, false);
		error_count += dmatest_verify(thread->dsts, dst_off + len,
				test_buf_size, dst_off + len,
				PATTERN_DST, false);
		diff = ktime_sub(ktime_get(), start);
		comparetime = ktime_add(comparetime, diff);

		if (error_count) {
			pr_warn("%s: #%u: %u errors with ",
				thread_name, total_tests - 1, error_count);
			pr_warn("src_off=0x%x dst_off=0x%x len=0x%x\n",
				src_off, dst_off, len);
			failed_tests++;
		} else {
			pr_debug("%s: #%u: No errors with ",
				 thread_name, total_tests - 1);
			pr_debug("src_off=0x%x dst_off=0x%x len=0x%x\n",
				 src_off, dst_off, len);
		
		}*/

	}

	ktime = ktime_sub(ktime_get(), ktime);
	ktime = ktime_sub(ktime, comparetime);
	ktime = ktime_sub(ktime, filltime);
	runtime = ktime_to_us(ktime);

	ret = 0;
	for (i = 0; thread->dsts[i]; i++)
		kfree(thread->dsts[i]);
err_dstbuf:
	kfree(thread->dsts);
err_dsts:
	for (i = 0; thread->srcs[i]; i++)
		kfree(thread->srcs[i]);
err_srcbuf:
	kfree(thread->srcs);
err_srcs:
	pr_notice("%s: terminating after %u tests, %u failures %llu iops %llu KB/s (status %d)\n",
		  thread_name, total_tests, failed_tests,
		  dmatest_persec(runtime, total_tests),
		  dmatest_KBs(runtime, total_len), ret);

	thread->done = true;
	wake_up(&thread_wait);

	return ret;
}



static void dmatest_cleanup_channel(struct dmatest_chan *dtc)
{
	struct dmatest_slave_thread *thread;
	struct dmatest_slave_thread *_thread;
	int ret;

	list_for_each_entry_safe(thread, _thread, &dtc->threads, node) {
		ret = kthread_stop(thread->task);
		pr_debug("dmatest: thread %s exited with status %d\n",
			 thread->task->comm, ret);
		list_del(&thread->node);
		put_task_struct(thread->task);
		kfree(thread);
	}
	kfree(dtc);
}

static int dmatest_add_slave_threads(struct dmatest_chan *tx_dtc,
				     struct dmatest_chan *rx_dtc)
{
	struct dmatest_slave_thread *thread;
	struct dma_chan *tx_chan = tx_dtc->chan;
	struct dma_chan *rx_chan = rx_dtc->chan;
	int ret;

	thread = kzalloc(sizeof(struct dmatest_slave_thread), GFP_KERNEL);
	if (!thread) {
		pr_warn("dmatest: No memory for slave thread %s-%s\n",
			dma_chan_name(tx_chan), dma_chan_name(rx_chan));
	}

	thread->tx_chan = tx_chan;
	thread->rx_chan = rx_chan;
	thread->type = (enum dma_transaction_type)DMA_SLAVE;

	/* Ensure that all previous writes are complete */
	smp_wmb();
	thread->task = kthread_run(dmatest_slave_func, thread, "%s-%s",
				   dma_chan_name(tx_chan),
				   dma_chan_name(rx_chan));
	ret = PTR_ERR(thread->task);
	if (IS_ERR(thread->task)) {
		pr_warn("dmatest: Failed to run thread %s-%s\n",
			dma_chan_name(tx_chan), dma_chan_name(rx_chan));
		kfree(thread);
		return ret;
	}

	/* srcbuf and dstbuf are allocated by the thread itself */
	get_task_struct(thread->task);
	list_add_tail(&thread->node, &tx_dtc->threads);

	/* Added one thread with 2 channels */
	return 1;
}

static int dmatest_add_slave_channels(struct dma_chan *tx_chan,
				      struct dma_chan *rx_chan)
{
	struct dmatest_chan *tx_dtc;
	struct dmatest_chan *rx_dtc;
	unsigned int thread_count = 0;

	tx_dtc = kmalloc(sizeof(struct dmatest_chan), GFP_KERNEL);
	if (!tx_dtc) {
		pr_warn("dmatest: No memory for tx %s\n",
			dma_chan_name(tx_chan));
		return -ENOMEM;
	}

	rx_dtc = kmalloc(sizeof(struct dmatest_chan), GFP_KERNEL);
	if (!rx_dtc) {
		pr_warn("dmatest: No memory for rx %s\n",
			dma_chan_name(rx_chan));
		return -ENOMEM;
	}

	tx_dtc->chan = tx_chan;
	rx_dtc->chan = rx_chan;
	INIT_LIST_HEAD(&tx_dtc->threads);
	INIT_LIST_HEAD(&rx_dtc->threads);

	dmatest_add_slave_threads(tx_dtc, rx_dtc);
	thread_count += 1;

	pr_info("dmatest: Started %u threads using %s %s\n",
		thread_count, dma_chan_name(tx_chan), dma_chan_name(rx_chan));

	list_add_tail(&tx_dtc->node, &dmatest_channels);
	list_add_tail(&rx_dtc->node, &dmatest_channels);
	nr_channels += 2;
//wait thread exe over!
//	if (iterations)
//		wait_event(thread_wait, !is_threaded_test_run(tx_dtc, rx_dtc));

	return 0;
}

static int xilinx_axidmatest_probe(struct platform_device *pdev)
{
	/*
	struct dma_chan *chan, *rx_chan;
	*/
	int err;

	global_tx_chan = dma_request_chan(&pdev->dev, "axidma0");
	if (IS_ERR(global_tx_chan)) {
		err = PTR_ERR(global_tx_chan);
		if (err != -EPROBE_DEFER)
			pr_err("xilinx_dmatest: No Tx channel\n");
		return err;
	}

        global_rx_chan = dma_request_chan(&pdev->dev, "axidma1");
	if (IS_ERR(global_rx_chan)) {
		err = PTR_ERR(global_rx_chan);
		if (err != -EPROBE_DEFER)
			pr_err("xilinx_dmatest: No Rx channel\n");
		goto free_tx;
	}

             

	/*
	//添加发送通道和接收通道
	err = dmatest_add_slave_channels(chan, rx_chan);
	if (err) {
		pr_err("xilinx_dmatest: Unable to add channels\n");
		goto free_rx;
	}*/

	return 0;

free_rx:
	dma_release_channel(global_rx_chan);
free_tx:
	dma_release_channel(global_tx_chan);

	return err;
}

static int xilinx_axidmatest_remove(struct platform_device *pdev)
{
	struct dmatest_chan *dtc, *_dtc;
	struct dma_chan *chan;

	list_for_each_entry_safe(dtc, _dtc, &dmatest_channels, node) {
		list_del(&dtc->node);
		chan = dtc->chan;
		dmatest_cleanup_channel(dtc);
		pr_info("xilinx_dmatest: dropped channel %s\n",
			dma_chan_name(chan));
		dmaengine_terminate_all(chan);
		dma_release_channel(chan);
	}
	return 0;
}



#define DMA_LENGTH	        MY_MAX_BUF_SIZE

static struct device *mydevice = NULL;


static int major;

static struct class *dma_class   = NULL;
static int dma_init(void);
static void dma_exit(void);
static int dma_open(struct inode *inode,struct file *file);
static int dma_release(struct inode *inode,struct file *file);
static ssize_t dma_write(struct file *file,const char __user *buf, size_t count,loff_t *ppos);
static ssize_t dma_read(struct file *file,char __user *buf,size_t size,loff_t *ppos);

/*file_operations 结构数据，沟通内核与操作系统桥梁*/

static struct file_operations dma_lops=

{
.owner = THIS_MODULE,
.read  = dma_read,
.open  = dma_open,
.write = dma_write,
.release = dma_release,
};

/*
 * 初始化，用于module init
 * */
static int dma_init(void)
{

    //int err,nouse;
    major=register_chrdev(0,"ps_dma_dev",&dma_lops);
    dma_class= class_create(THIS_MODULE,"ps_dma_dev");
    mydevice = device_create(dma_class,NULL,MKDEV(major,0),NULL,"ps_dma_dev");

#ifdef MYDEBUG    
    printk("major dev number= %d\n",major);
#endif
    
    return 0;
}

/*退出 用于 module exit */

static void dma_exit(void)
{
    unregister_chrdev(major,"ps_dma_dev");
    device_destroy(dma_class,MKDEV(major,0));
    class_destroy(dma_class);

    return ;
}

//open 接口函数

static int dma_open(struct inode *inode,struct file *file)
{

    int i;
    int src_cnt;
    int dst_cnt;
    int bd_cnt = XILINX_DMATEST_BD_CNT;       

    //这里为11，实际上是创建11个sg，sg支持多个分块dma传输，这里可以改成1，就一块
    dst_cnt = bd_cnt;
    src_cnt = bd_cnt;


#ifdef MYDEBUG
  printk(" global_srcs = kcalloc\n");
#endif
	    
    //申请发送缓冲区数据结构，
    global_srcs = kcalloc(src_cnt + 1, sizeof(u8 *), GFP_KERNEL);
    if (!global_srcs)
        return -1;

#ifdef MYDEBUG
   printk(" global_srcs[i] = kmalloc\n");
#endif
	    
     //申请发送缓冲区，这里面放发送的数据，往datafifo发送的数据
    for (i = 0; i < src_cnt; i++) {
        global_srcs[i] = kmalloc(test_buf_size, GFP_KERNEL);
        if (!global_srcs[i])
               return -1;
    }
    global_srcs[i] = NULL;


#ifdef MYDEBUG
  printk("  global_dsts = kcalloc\n");
#endif

    //申请接收缓冲区数据结构，
    global_dsts = kcalloc(dst_cnt + 1, sizeof(u8 *), GFP_KERNEL);
    if (!global_dsts)
        return -1;

#ifdef MYDEBUG
   printk("  global_dsts[i] = kmalloc\n");
#endif

    for (i = 0; i < dst_cnt; i++) {
        global_dsts[i] = kmalloc(test_buf_size, GFP_KERNEL);
        if (!global_dsts[i])
              return -1;
    }
    global_dsts[i] = NULL;


#ifdef MYDEBUG
    printk("DMA open\n");
#endif

    return 0;

}

//release 接口函数

static int dma_release(struct inode *inode,struct file *file)
{
#ifdef MYDEBUG
    printk("DMA release\n");
#endif


      int i;

      for (i = 0; global_dsts[i]; i++)
	     kfree(global_dsts[i]);

      kfree(global_dsts);

      for (i = 0; global_srcs[i]; i++)
	     kfree(global_srcs[i]);

      kfree(global_srcs);   

      return 0;
}



// write 接口函数
static ssize_t dma_write(struct file *file,const char __user *buf, size_t count,loff_t *ppos)
{

    static int cnt;

#ifdef MYDEBUG
    printk("dma write start !..................................%d\n",++cnt);
#endif

    if(count > DMA_LENGTH)
    {
#ifdef MYDEBUG
	 printk("the number of data is too large!\n");
#endif
	 return 0;
    }


   struct dma_device *tx_dev = global_tx_chan->device;
   struct dma_device *rx_dev = global_rx_chan->device;

   struct dma_async_tx_descriptor *txd = NULL;
   dma_addr_t dma_srcs[XILINX_DMATEST_BD_CNT];

   struct completion tx_cmp;
   unsigned long tx_tmo = msecs_to_jiffies(30000  / 100);

   u8 align = 0;

   /**
    //定义发送dma数据结构，
   */
   struct scatterlist tx_sg[XILINX_DMATEST_BD_CNT];
 

   int i;
   int src_cnt;
   int dst_cnt;
   int bd_cnt = XILINX_DMATEST_BD_CNT;

   unsigned int src_off, dst_off,len;
   unsigned long long total_len = 0;

   enum dma_status status;
   enum dma_ctrl_flags flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
   dma_cookie_t tx_cookie;

   dst_cnt = bd_cnt;
   src_cnt = bd_cnt;


   /**
   //----------------字节对齐，dma传输要求字节对齐
   */
   /* honor larger alignment restrictions */
   align = tx_dev->copy_align;
   if (rx_dev->copy_align > align)
	align = rx_dev->copy_align;

   if (1 << align > test_buf_size) {
	//pr_err("%u-byte buffer too small for %d-byte alignment\n",
	//       test_buf_size, 1 << align);
	return 0;
   }

   len = count % test_buf_size + 1;
   len = (len >> align) << align;
   if (!len)
      len = 1 << align;

   total_len += len;
   src_off = count % (test_buf_size - len + 1);
   dst_off = count % (test_buf_size - len + 1);

   src_off = (src_off >> align) << align;
   dst_off = (dst_off >> align) << align;



   //src_cnt == 1,bd_cnt == 1
   memcpy(global_srcs[0] + src_off,buf,len);

   #ifdef MYDEBUG
   printk("dma write len=%d\tsrc_off=%d\tdst_off=%d\n",len,src_off,dst_off);
   #endif

   for (i = 0; i < src_cnt; i++) 
   {
      u8 *tmp_buf = global_srcs[i] + src_off;

    /**
    //dma映射后获取的物理地址，实际上dma初始化用的是物理地址，thread->srcs[i] 这个是虚拟地址，是程序可以直接使用的，但是dma用的是物理地址
    */
     dma_srcs[i] = dma_map_single(tx_dev->dev, tmp_buf, len,
			     DMA_MEM_TO_DEV);
   }



  /**
   //初始化发送接收数据结构----------------------
  */
   sg_init_table(tx_sg, bd_cnt);

   for (i = 0; i < bd_cnt; i++)
   {
         //物理地址
	sg_dma_address(&tx_sg[i]) = dma_srcs[i];

        //dma一次传输的长度
	sg_dma_len(&tx_sg[i]) = len;
   }
	
	
   txd = tx_dev->device_prep_slave_sg(global_tx_chan, tx_sg, bd_cnt,
	DMA_MEM_TO_DEV, flags, NULL);

   #ifdef MYDEBUG
   printk("txd=%d\n",txd);
   #endif

   if (!txd) {
      for (i = 0; i < src_cnt; i++)
	  dma_unmap_single(tx_dev->dev, dma_srcs[i], len,
			 DMA_MEM_TO_DEV);
     return 0;
   }


   /**
   //发送的dma，
   */
   init_completion(&tx_cmp);

   /**
   //dma搬运完这个函数会运行
   */
   txd->callback = dmatest_slave_tx_callback;
   txd->callback_param = &tx_cmp;
   tx_cookie = txd->tx_submit(txd);

   if (dma_submit_error(tx_cookie)) {

     return 0;
   }

   /**
   //这个执行后就相当于给dma控制器发命令了，dma控制器就会根据我们提供的参数来自己搬运数据了
   */
   dma_async_issue_pending(global_tx_chan);

   
   tx_tmo = wait_for_completion_timeout(&tx_cmp, tx_tmo);

  
   //等待发送dma传输完 等completion
   status = dma_async_is_tx_complete(global_tx_chan, tx_cookie,
			  NULL, NULL);

   if (tx_tmo == 0) {
    //pr_warn("%s: #%u: tx test timed out\n",
    //thread_name, total_tests - 1);
    //continue;
    
   #ifdef MYDEBUG
   printk("tx test timed out\n");
   #endif

     return 0;
   } 
   else if (status != DMA_COMPLETE) {

     return 0;
   }

   //dma_unmap_single ?
   for (i = 0; i < src_cnt; i++)
      dma_unmap_single(tx_dev->dev, dma_srcs[i], len,
		       DMA_MEM_TO_DEV);


   #ifdef MYDEBUG
   printk("dma write ok!\n"); 
   #endif

   return count;
}


/*
 * read 接口函数
 * DMA读取数据是按照32bit读取的
 * */
static ssize_t dma_read(struct file *file,char __user *buf,size_t size,loff_t *ppos)
{   
    static int cnt;

#ifdef MYDEBUG
    printk("dma read start !..................................%d\n",++cnt);	 
#endif

    if(size > DMA_LENGTH)
    {
#ifdef MYDEBUG
	printk("the number of data is not enough!\n");
#endif
	return 0;
    }



        struct dma_device *tx_dev = global_tx_chan->device;
        struct dma_device *rx_dev = global_rx_chan->device;

        struct dma_async_tx_descriptor *rxd = NULL;
        dma_addr_t dma_dsts[XILINX_DMATEST_BD_CNT];

        struct completion rx_cmp;
        unsigned long rx_tmo = msecs_to_jiffies(300000 / 100); /* RX takes longer */

        u8 align = 0;


	int i;
	int src_cnt;
	int dst_cnt;
	int bd_cnt = XILINX_DMATEST_BD_CNT;

	unsigned int src_off, dst_off,len;
        unsigned long long total_len = 0;

	enum dma_status status;
	enum dma_ctrl_flags flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
	dma_cookie_t rx_cookie;

	dst_cnt = bd_cnt;
	src_cnt = bd_cnt;

        /**
          //定义接收dma数据结构，
        */
	struct scatterlist rx_sg[XILINX_DMATEST_BD_CNT];
	
        
        /**
          //----------------字节对齐，dma传输要求字节对齐
         */
	/* honor larger alignment restrictions */
	align = tx_dev->copy_align;
	if (rx_dev->copy_align > align)
		align = rx_dev->copy_align;

	if (1 << align > test_buf_size) {
		//pr_err("%u-byte buffer too small for %d-byte alignment\n",
		//       test_buf_size, 1 << align);
		return 0;
	}

	len = size % test_buf_size + 1;
	len = (len >> align) << align;
	if (!len)
	    len = 1 << align;

	total_len += len;
	src_off = size % (test_buf_size - len + 1);
	dst_off = size % (test_buf_size - len + 1);

	src_off = (src_off >> align) << align;
	dst_off = (dst_off >> align) << align;
		
		
        for (i = 0; i < dst_cnt; i++)
        {
	    /**
	       dma映射后获取的物理地址，实际上dma初始化用的是物理地址，thread->srcs[i] 这个是虚拟地址，是程序可以直接使用的，但是dma用的是物理地址
	     */
	    dma_dsts[i] = dma_map_single(rx_dev->dev,
					 global_dsts[i],
					 test_buf_size,
					 DMA_BIDIRECTIONAL);
       }		


        /**
         //初始化发送接收数据结构----------------------
        */
	sg_init_table(rx_sg, bd_cnt);

	for (i = 0; i < bd_cnt; i++)
       	{
                 //物理地址
		sg_dma_address(&rx_sg[i]) = dma_dsts[i] + dst_off;

                //dma一次传输的长度
		sg_dma_len(&rx_sg[i]) = len;
	}


        //receive sg
        rxd = rx_dev->device_prep_slave_sg(global_rx_chan, rx_sg, bd_cnt,
                      DMA_DEV_TO_MEM, flags, NULL);

#ifdef  MYDEBUG
        printk("ret rxd=%d\n",rxd);
#endif

       if (!rxd) {
           for (i = 0; i < dst_cnt; i++)
                dma_unmap_single(rx_dev->dev, 
				 dma_dsts[i],
                                 test_buf_size,
                                 DMA_BIDIRECTIONAL);
       
           //pr_warn("dst_off=0x%x len=0x%x\n",
           //     dst_off, len);
           return 0;
       }
     

      /**
       接收的dma，先准备接收的
      */
      init_completion(&rx_cmp);
      /**
      dma搬运完这个函数会运行
      */
      rxd->callback = dmatest_slave_rx_callback;
      rxd->callback_param = &rx_cmp;
      rx_cookie = rxd->tx_submit(rxd);		     


      if (dma_submit_error(rx_cookie)) {
        //pr_warn("%s: #%u: submit error %d/%d with src_off=0x%x ",
        //        thread_name, total_tests - 1,
        //        rx_cookie, tx_cookie, src_off);
        //pr_warn("dst_off=0x%x len=0x%x\n",
        //        dst_off, len);
        return 0;
      }
    

     dma_async_issue_pending(global_rx_chan);

     rx_tmo = wait_for_completion_timeout(&rx_cmp, rx_tmo);

     //等待接收传输完
     status = dma_async_is_tx_complete(global_rx_chan, rx_cookie,
				  NULL, NULL);
   
     if (rx_tmo == 0) {
	//pr_warn("%s: #%u: rx test timed out\n",
	//	thread_name, total_tests - 1);
	
#ifdef MYDEBUG
       printk("rx test timed out\n");	     
#endif	     

	return 0;
     } else if (status != DMA_COMPLETE) {
	//pr_warn("%s: #%u: rx got completion callback, ",
	//	thread_name, total_tests - 1);
	//pr_warn("but status is \'%s\'\n",
	//	status == DMA_ERROR ? "error" :
	//	"in progress");
	return 0;
     }


    /**
      前面是先映射，这里是解除映射，这样缓冲区的数据才是正确的，不执行这个操作，缓冲区数据不对。
     */ 
    for (i = 0; i < dst_cnt; i++){
	/**
	 * DMA_BIDIRECTIONAL。这个参数表示DMA传输可以是任何一个方向。平台可以保证我们这样做且能够正常工作，但是是以性能为代价的。
	 * */
	dma_unmap_single(rx_dev->dev, dma_dsts[i],
			 test_buf_size, DMA_BIDIRECTIONAL);
    }


    if(copy_to_user(buf,global_dsts[0] + dst_off,size))
    {
       return -EFAULT;
    }
	

#ifdef MYDEBUG
    printk("\ndma read ok!\n");
#endif


    return size;
}


/

static const struct of_device_id xilinx_axidmatest_of_ids[] = {
	{ .compatible = "xlnx,axi-dma-test-1.00.a",},
	{}
};

static struct platform_driver xilinx_axidmatest_driver = {
	.driver = {
		.name = "xilinx_axidmatest",
		.of_match_table = xilinx_axidmatest_of_ids,
	},
	.probe = xilinx_axidmatest_probe,
	.remove = xilinx_axidmatest_remove,
};

static int __init axidma_init(void)
{
        dma_init();

	return platform_driver_register(&xilinx_axidmatest_driver);
}
late_initcall(axidma_init);

static void __exit axidma_exit(void)
{
	platform_driver_unregister(&xilinx_axidmatest_driver);

	dma_exit();
}
module_exit(axidma_exit)

MODULE_AUTHOR("Xilinx, Inc.");
MODULE_DESCRIPTION("Xilinx AXI DMA Test Client");
MODULE_LICENSE("GPL v2");

3.应用层测试程序

#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <stdlib.h>

void delay(void)
{
    int i,j;
    for(i=0;i<20000;i++)
        for(j=0;j<10000;j++);
}


unsigned char rxbuf[1024 * 768 * 4 + 4] = {0};


int main(int argc , char ** argv)
{
    int fd;
    int i = 0;
    int readcnt;
    
    printf("main start...\n");

    fd = open("/dev/ps_dma_dev",O_RDWR);
    if(fd < 0) 
    {
       printf("can not open file\n");
       return -1;
    }
    else
    { 
      printf("open file sucuss\n");
    }
      
    delay();
    
       
   
    struct timeval start, end;  // define 2 struct timeval variables

    //read test
    while(1)
    {
	    
	memset(rxbuf,0,sizeof(rxbuf));    

	gettimeofday(&start, NULL); // get the beginning time
        //bytes,fd,rxbuf,bytes
        readcnt = read(fd,rxbuf,sizeof(rxbuf)); 

	gettimeofday(&end, NULL);  // get the end time


	if(readcnt > 0)
	{
             long long total_time = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec); // get the run time by microsecond

	     printf("readcnt=%d\ttime=%lld ms\n", readcnt,total_time / 1000);
	     printf("bytes[0--23]=%08x\t%08x\t%08x\t%08x\t%08x\t%08x\n",
			     rxbuf[0] << 24 |
			     rxbuf[1] << 16 |
			     rxbuf[2] << 8  |
			     rxbuf[3],
                             rxbuf[4] << 24 |
                             rxbuf[5] << 16 |
                             rxbuf[6] << 8  |
                             rxbuf[7],
			     rxbuf[8] << 24 |
                             rxbuf[9] << 16 |
                             rxbuf[10] << 8  |
                             rxbuf[11],
                             rxbuf[12] << 24 |
                             rxbuf[13] << 16 |
                             rxbuf[14] << 8  |
                             rxbuf[15],
                             rxbuf[16] << 24 |
                             rxbuf[17] << 16 |
                             rxbuf[18] << 8  |
                             rxbuf[19],
                             rxbuf[20] << 24 |
                             rxbuf[21] << 16 |
                             rxbuf[22] << 8  |
                             rxbuf[23]);

                 /*
                 for(i =0;i<100;i++)
		 {
		    printf("%02x ",rxbuf[i * 4 + 3]);	 
		 }
		 printf("==\n");*/
	}
	else
	{
		
	}
	
    }

    return 0;
}