前几天实验了一下XILINX官方的提供的在UBUNTU平台下的PCIE驱动,以下文字做个简单记录和总结。时间有限简单写写,可能以后有补充。
1,在LINUX下安装驱动后在/dev/目录里面多了十多个xdma0_开头的设备。
设备一:xdma0_c2h_0设备:xdma0_c2h_0 是一个只读设备,用open函数打开后直接调用read函数进行读,如果FPGA段接的不是AXI STREAM,而是带有地址的AXI 接口,可以调用seek函数进行起始地址的指定。操作完毕后关闭文件用close函数。以下代码是我做的测试速度的代码,展示了xdma0_c2h_0这个设备的使用。
/*
* This file is part of the Xilinx DMA IP Core driver tool for Linux
*
* Copyright (c) 2016-present, Xilinx, Inc.
* All rights reserved.
*
* This source code is licensed under BSD-style license (found in the
* LICENSE file in the root directory of this source tree)
*/
//gcc h2c.c -lrt -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D_LARGE_FILE_SOURCE
#define _BSD_SOURCE
#define _XOPEN_SOURCE 500
#include <assert.h>
#include <fcntl.h>
#include <getopt.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <time.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <unistd.h>
/* Subtract timespec t2 from t1
*
* Both t1 and t2 must already be normalized
* i.e. 0 <= nsec < 1000000000
*/
static int timespec_check(struct timespec *t)
{
if ((t->tv_nsec < 0) || (t->tv_nsec >= 1000000000))
return -1;
return 0;
}
void timespec_sub(struct timespec *t1, struct timespec *t2)
{
if (timespec_check(t1) < 0) {
fprintf(stderr, "invalid time #1: %lld.%.9ld.\n",
(long long)t1->tv_sec, t1->tv_nsec);
return;
}
if (timespec_check(t2) < 0) {
fprintf(stderr, "invalid time #2: %lld.%.9ld.\n",
(long long)t2->tv_sec, t2->tv_nsec);
return;
}
t1->tv_sec -= t2->tv_sec;
t1->tv_nsec -= t2->tv_nsec;
if (t1->tv_nsec >= 1000000000) {
t1->tv_sec++;
t1->tv_nsec -= 1000000000;
} else if (t1->tv_nsec < 0) {
t1->tv_sec--;
t1->tv_nsec += 1000000000;
}
}
int fpga_fd_c2h ;
int test_h2c(){
struct timespec ts_start, ts_end;
long long i;
char *buffer = NULL;
char *allocated = NULL;
long long size = 1024 * 1024 * 1 ;
long long loop = 1024 * 40 ;
//size *=loop ;loop=1;
double ns,s,rate;
double total_time = 0 ;
///fpga_fd_c2h= open( "/dev/xdma0_h2c_0", O_RDWR );
fpga_fd_c2h = open( "/dev/xdma0_c2h_0", O_RDWR | O_NONBLOCK);
if ( fpga_fd_c2h < 0 ) {printf("can not open device %s\n");return (1);}
posix_memalign((void **)&allocated, 4096 , size + 4096);
if (allocated==NULL ){ printf ( "can not alloc memor %d Bytes \n" , size ) ; return (1); }
printf("start read \n");
clock_gettime(CLOCK_MONOTONIC, &ts_start);
//lseek(fpga_fd_c2h ,0,SEEK_SET);
for(i=0;i<loop;++i) read(fpga_fd_c2h,allocated,size );
clock_gettime(CLOCK_MONOTONIC, &ts_end);
timespec_sub(&ts_end, &ts_start);
ns = ts_end.tv_nsec;
s = ts_end.tv_sec;
total_time += (double)(1000*1000*1000*s) + (double) ns ;
printf("%f GBytes, %f s \n",(double)((size * loop )/(1024*1024*1024)), (double)(total_time/(1000*1000*1000)) );
rate = (double)((size * loop )/(1024*1024*1024)) / (double)(total_time/(1000*1000*1000)) ;
printf("Rate is %2.3f G Byte/second \n",rate);
close (fpga_fd_c2h) ;
}
main (){
test_h2c();
}
设备二:xdma0_h2c_0设备:xdma0_h2c_0 是一个只写设备,用open函数打开后直接调用write函数进行写,如果FPGA段接的不是AXI STREAM,而是带有地址的AXI 接口,可以调用seek函数进行起始地址的指定。操作完毕后关闭文件用close函数。以下代码是我做的测试速度的代码,展示了xdma0_h2c_0这个设备的使用。以下代码是我做的测试速度的代码,展示了xdma0_c2h_0这个设备的使用。
/*
* This file is part of the Xilinx DMA IP Core driver tool for Linux
*
* Copyright (c) 2016-present, Xilinx, Inc.
* All rights reserved.
*
* This source code is licensed under BSD-style license (found in the
* LICENSE file in the root directory of this source tree)
*/
//gcc h2c.c -lrt -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D_LARGE_FILE_SOURCE
#define _BSD_SOURCE
#define _XOPEN_SOURCE 500
#include <assert.h>
#include <fcntl.h>
#include <getopt.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <time.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <unistd.h>
/* Subtract timespec t2 from t1
*
* Both t1 and t2 must already be normalized
* i.e. 0 <= nsec < 1000000000
*/
static int timespec_check(struct timespec *t)
{
if ((t->tv_nsec < 0) || (t->tv_nsec >= 1000000000))
return -1;
return 0;
}
void timespec_sub(struct timespec *t1, struct timespec *t2)
{
if (timespec_check(t1) < 0) {
fprintf(stderr, "invalid time #1: %lld.%.9ld.\n",
(long long)t1->tv_sec, t1->tv_nsec);
return;
}
if (timespec_check(t2) < 0) {
fprintf(stderr, "invalid time #2: %lld.%.9ld.\n",
(long long)t2->tv_sec, t2->tv_nsec);
return;
}
t1->tv_sec -= t2->tv_sec;
t1->tv_nsec -= t2->tv_nsec;
if (t1->tv_nsec >= 1000000000) {
t1->tv_sec++;
t1->tv_nsec -= 1000000000;
} else if (t1->tv_nsec < 0) {
t1->tv_sec--;
t1->tv_nsec += 1000000000;
}
}
int fpga_fd_h2c ;
int test_h2c(){
struct timespec ts_start, ts_end;
long long i;
char *buffer = NULL;
char *allocated = NULL;
long long size = 1024 * 1024 * 1 ;
long long loop = 1024 * 40 ;
//size *=loop ;loop=1;
double ns,s,rate;
double total_time = 0 ;
fpga_fd_h2c = open( "/dev/xdma0_h2c_0", O_RDWR );
//fpga_fd_h2c = open( "/dev/xdma0_c2h_0", O_RDWR | O_NONBLOCK);
if ( fpga_fd_h2c < 0 ) {printf("can not open device %s\n");return (1);}
posix_memalign((void **)&allocated, 4096 , size + 4096);
if (allocated==NULL ){ printf ( "can not alloc memor %d Bytes \n" , size ) ; return (1); }
printf("start write \n");
clock_gettime(CLOCK_MONOTONIC, &ts_start);
//lseek(fpga_fd_c2h ,0,SEEK_SET);
for(i=0;i<loop;++i) write(fpga_fd_h2c,allocated,size );
clock_gettime(CLOCK_MONOTONIC, &ts_end);
timespec_sub(&ts_end, &ts_start);
ns = ts_end.tv_nsec;
s = ts_end.tv_sec;
total_time += (double)(1000*1000*1000*s) + (double) ns ;
printf("%f GBytes, %f s \n",(double)((size * loop )/(1024*1024*1024)), (double)(total_time/(1000*1000*1000)) );
rate = (double)((size * loop )/(1024*1024*1024)) / (double)(total_time/(1000*1000*1000)) ;
printf("Rate is %2.3f G Byte/second \n",rate);
close (fpga_fd_h2c) ;
}
main (){
test_h2c();
}
设备三:xdma0_user设备:这个是可以读写挂在AXI SLAVE 的IP上的寄存器。上两个设备对应的是数据流。这个是对应用户外设寄存器。
#ifdef __cplusplus
extern "C" {
#endif
#include <assert.h>
#include <fcntl.h>
#include <getopt.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <time.h>
#include <pthread.h>
#include <semaphore.h>
#include <stdarg.h>
#include <syslog.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/sysinfo.h>
#include <unistd.h>
#include <dirent.h>
#include <string.h>
/* ltoh: little to host */
/* htol: little to host */
#if __BYTE_ORDER == __LITTLE_ENDIAN
# define ltohl(x) (x)
# define ltohs(x) (x)
# define htoll(x) (x)
# define htols(x) (x)
#elif __BYTE_ORDER == __BIG_ENDIAN
# define ltohl(x) __bswap_32(x)
# define ltohs(x) __bswap_16(x)
# define htoll(x) __bswap_32(x)
# define htols(x) __bswap_16(x)
#endif
#define MAP_SIZE (1024*1024UL)
#define MAP_MASK (MAP_SIZE - 1)
#define LED_BASE 0x30000
#define BTN_BASE 0x40000
void *control_base;
int control_fd;
static int open_control(char *filename)
{
int fd;
fd = open(filename, O_RDWR | O_SYNC);
if(fd == -1)
{
printf("open control error\n");
return -1;
}
return fd;
}
static void *mmap_control(int fd,long mapsize)
{
void *vir_addr;
vir_addr = mmap(0, mapsize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
return vir_addr;
}
void write_control(int offset,uint32_t val)
{
uint32_t writeval = htoll(val);
*((uint32_t *)(control_base+offset)) = writeval;
}
uint32_t read_control(int offset)
{
uint32_t read_result = *((uint32_t *)(control_base+offset));
read_result = ltohl(read_result);
return read_result;
}
void write_io(uint32_t val)
{
write_control(LED_BASE,val);
}
uint32_t read_io(void)
{
return read_control(BTN_BASE);
}
int pcie_init()
{
control_fd = open_control("/dev/xdma0_user");
if(control_fd < 0)
return -5;
control_base = mmap_control(control_fd,MAP_SIZE);
return 1;
}
void pcie_deinit()
{
close(control_fd);
}
#ifdef __cplusplus
}
#endif
其实就是先用open函数打开(关闭对用使用close)获得设备fd,之后使用mmap函数,将fd映射成虚拟基地址,之后对寄存器的操作就是基于这个虚拟的起始地址加上偏移量后用指针进行操作读写。这其实和我们在zynq上跑的linux使用mmap函数映射寄存器有点相似,只是那里传给mmap的的是设备的物理地址,这里传给mmap的是文件的句柄(暂且使用句柄一词)。其中htoll是将数据调整成小字端,在X86,AMD64,以及ARM平台都是小字端,因此这个宏可以省掉。
设备四:xdma0_cntrol设备:这是可以读写Xilinx的pcie那个IP的寄存器的函数。和xdma0_user类似的操作。这里不放代码。
另外还有16个event设备,这里我们暂时不做研究。
我用下图来来直观展示这四个文件对应的操作内容:
{{aAxvOXMOIvVUoXMxvoxiowMwWV8xxWTxoxOIOVIUUOvwVOUiIoUvvTMMVMwovWHWX8vOUOVU8wMTWWoOXwTMVTwHmHo8XMmMOXIXMvIwmixUIiUxiOMoiHIoVU8VvmvIWXTvvOvv8xvMovOWMOTxUvMT8UmooOZz}}