#include"spdk/stdinc.h"#include"spdk/nvme.h"#include"spdk/vmd.h"#include"spdk/nvme_zns.h"#include"spdk/env.h"#include<sys/time.h>struct ctrlr_entry {struct spdk_nvme_ctrlr *ctrlr;TAILQ_ENTRY(ctrlr_entry) link;//尾队列char name[1024];};struct ns_entry {struct spdk_nvme_ctrlr *ctrlr;struct spdk_nvme_ns *ns;TAILQ_ENTRY(ns_entry) link;//尾队列struct spdk_nvme_qpair *qpair;};staticTAILQ_HEAD(, ctrlr_entry) g_controllers =TAILQ_HEAD_INITIALIZER(g_controllers);//初始化控制器队列头部staticTAILQ_HEAD(, ns_entry) g_namespaces =TAILQ_HEAD_INITIALIZER(g_namespaces);//初始化namespace队列头部static bool g_vmd = false;//是否使用vmd(卷管理设备)staticvoidregister_ns(struct spdk_nvme_ctrlr *ctrlr,struct spdk_nvme_ns *ns){struct ns_entry *entry;if(!spdk_nvme_ns_is_active(ns)){return;}
entry =malloc(sizeof(struct ns_entry));if(entry ==NULL){perror("ns_entry malloc error");exit(1);}
entry->ctrlr = ctrlr;
entry->ns = ns;TAILQ_INSERT_TAIL(&g_namespaces, entry, link);//插入到队列尾部printf(" Namespace ID: %d size: %juGB\n",spdk_nvme_ns_get_id(ns),spdk_nvme_ns_get_size(ns)/1000000000);}struct hello_world_sequence {struct ns_entry *ns_entry;char*buf;unsigned using_cmb_io;int is_completed;};staticvoidread_complete(void*arg,conststruct spdk_nvme_cpl *completion){struct hello_world_sequence *sequence = arg;/* Assume the I/O was successful */
sequence->is_completed =1;//假设该read操作已经完成/* See if an error occurred. If so, display information
* about it, and set completion value so that I/O
* caller is aware that an error occurred.
*/if(spdk_nvme_cpl_is_error(completion)){//检查nvme完成是否错误//打印nvme完成队列条目的内容spdk_nvme_qpair_print_completion(sequence->ns_entry->qpair,(struct spdk_nvme_cpl *)completion);fprintf(stderr,"I/O error status: %s\n",spdk_nvme_cpl_get_status_string(&completion->status));fprintf(stderr,"Read I/O failed, aborting run\n");
sequence->is_completed =2;exit(1);}/*
* The read I/O has completed. Print the contents of the
* buffer, free the buffer, then mark the sequence as
* completed. This will trigger the hello_world() function
* to exit its polling loop.
*/printf("%d\n",strlen(sequence->buf));printf("%s", sequence->buf);spdk_free(sequence->buf);}staticvoidwrite_complete(void*arg,conststruct spdk_nvme_cpl *completion){struct hello_world_sequence *sequence = arg;struct ns_entry *ns_entry = sequence->ns_entry;int rc;/* See if an error occurred. If so, display information
* about it, and set completion value so that I/O
* caller is aware that an error occurred.
*/if(spdk_nvme_cpl_is_error(completion)){spdk_nvme_qpair_print_completion(sequence->ns_entry->qpair,(struct spdk_nvme_cpl *)completion);fprintf(stderr,"I/O error status: %s\n",spdk_nvme_cpl_get_status_string(&completion->status));fprintf(stderr,"Write I/O failed, aborting run\n");
sequence->is_completed =2;exit(1);}/*
* The write I/O has completed. Free the buffer associated with
* the write I/O and allocate a new zeroed buffer for reading
* the data back from the NVMe namespace.
*/if(sequence->using_cmb_io){spdk_nvme_ctrlr_unmap_cmb(ns_entry->ctrlr);}else{spdk_free(sequence->buf);}
sequence->buf =spdk_zmalloc(0x1000,0x1000,NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
rc =spdk_nvme_ns_cmd_read(ns_entry->ns, ns_entry->qpair, sequence->buf,2,/* LBA start 起始扇区 */1,/* number of LBAs 要是用的扇区个数 */
read_complete,(void*)sequence,0);if(rc !=0){fprintf(stderr,"starting read I/O failed\n");exit(1);}}staticvoidreset_zone_complete(void*arg,conststruct spdk_nvme_cpl *completion){struct hello_world_sequence *sequence = arg;/* Assume the I/O was successful */
sequence->is_completed =1;/* See if an error occurred. If so, display information
* about it, and set completion value so that I/O
* caller is aware that an error occurred.
*/if(spdk_nvme_cpl_is_error(completion)){//spdk_nvme_qpair_print_completion(sequence->ns_entry->qpair,(struct spdk_nvme_cpl *)completion);fprintf(stderr,"I/O error status: %s\n",spdk_nvme_cpl_get_status_string(&completion->status));fprintf(stderr,"Reset zone I/O failed, aborting run\n");
sequence->is_completed =2;exit(1);}}staticvoidreset_zone_and_wait_for_completion(struct hello_world_sequence *sequence){//重置操作提交到nvme的命名空间if(spdk_nvme_zns_reset_zone(sequence->ns_entry->ns, sequence->ns_entry->qpair,0,/* starting LBA of the zone to reset */
false,/* don't reset all zones */
reset_zone_complete,
sequence)){fprintf(stderr,"starting reset zone I/O failed\n");exit(1);}while(!sequence->is_completed){spdk_nvme_qpair_process_completions(sequence->ns_entry->qpair,0);}
sequence->is_completed =0;}staticvoidhello_world(void){struct ns_entry *ns_entry;struct hello_world_sequence sequence;int rc;
size_t sz;TAILQ_FOREACH(ns_entry,&g_namespaces, link){//遍历尾队列中的每一个元素/*
* Allocate an I/O qpair that we can use to submit read/write requests
* to namespaces on the controller. NVMe controllers typically support
* many qpairs per controller. Any I/O qpair allocated for a controller
* can submit I/O to any namespace on that controller.
*
* The SPDK NVMe driver provides no synchronization for qpair accesses -
* the application must ensure only a single thread submits I/O to a
* qpair, and that same thread must also check for completions on that
* qpair. This enables extremely efficient I/O processing by making all
* I/O operations completely lockless.
*///根据控制器信息为I/O申请提交/完成qpair队列
ns_entry->qpair =spdk_nvme_ctrlr_alloc_io_qpair(ns_entry->ctrlr,NULL,0);if(ns_entry->qpair ==NULL){printf("ERROR: spdk_nvme_ctrlr_alloc_io_qpair() failed\n");return;}/*
* Use spdk_dma_zmalloc to allocate a 4KB zeroed buffer. This memory
* will be pinned, which is required for data buffers used for SPDK NVMe
* I/O operations.
*/
sequence.using_cmb_io =1;
sequence.buf =spdk_nvme_ctrlr_map_cmb(ns_entry->ctrlr,&sz);//从控制器中申请内存//如果ctrlr中的buf不够则从主机内存中去分配if(sequence.buf ==NULL|| sz <0x1000){
sequence.using_cmb_io =0;
sequence.buf =spdk_zmalloc(0x1000,0x1000,NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);}if(sequence.buf ==NULL){printf("ERROR: write buffer allocation failed\n");return;}if(sequence.using_cmb_io){printf("INFO: using controller memory buffer for IO\n");}else{printf("INFO: using host memory buffer for IO\n");}
sequence.is_completed =0;
sequence.ns_entry = ns_entry;/*
* If the namespace is a Zoned Namespace, rather than a regular
* NVM namespace, we need to reset the first zone, before we
* write to it. This not needed for regular NVM namespaces.
*///若是分区的namespace则需要在写入第一个区域之前对其进行重置if(spdk_nvme_ns_get_csi(ns_entry->ns)== SPDK_NVME_CSI_ZNS){//判断是否是分区的namespacereset_zone_and_wait_for_completion(&sequence);}/*
* Print "Hello world!" to sequence.buf. We will write this data to LBA
* 0 on the namespace, and then later read it back into a separate buffer
* to demonstrate the full I/O path.
*/int ns_nsize =spdk_nvme_ns_get_size(ns_entry->ns);//获取命名空间ns的最大sizeint ctrlr_max_size =spdk_nvme_ctrlr_get_max_xfer_size(ns_entry->ctrlr);//(128M)NVMe控制器的最大数据传输大小(以字节为单位)int sector_size =spdk_nvme_ns_get_sector_size(ns_entry->ns);//获取指定命名空间扇区大小char* str ="Hello world!\n";snprintf(sequence.buf,0x1000,"string:%s, ns_nsize:%d, ctrlr_max_size:%d ,sector_size:%d\n", str,ns_nsize,ctrlr_max_size,sector_size);//4K一个扇区大小printf("str_len:%d\n",strlen(sequence.buf));//snprintf(sequence.buf, 0x1000, "%s",sizeof(sequence.buf));/*
* Write the data buffer to LBA 0 of this namespace. "write_complete" and
* "&sequence" are specified as the completion callback function and
* argument respectively. write_complete() will be called with the
* value of &sequence as a parameter when the write I/O is completed.
* This allows users to potentially specify different completion
* callback routines for each I/O, as well as pass a unique handle
* as an argument so the application knows which I/O has completed.
*
* Note that the SPDK NVMe driver will only check for completions
* when the application calls spdk_nvme_qpair_process_completions().
* It is the responsibility of the application to trigger the polling
* process.
*/int n =1;struct timeval start,end;gettimeofday(&start,NULL);for(int i=0;i<n;i++){//功能:将写入的I/O提交到指定的NVMe命名空间
rc =spdk_nvme_ns_cmd_write(ns_entry->ns, ns_entry->qpair, sequence.buf,
i/10000,/* LBA start 扇区的起始位置 */3,/* number of LBAs 写入的扇区个数 */
write_complete,&sequence,0);//该命令将此i/o请求提交给上一步申请的qpair-----}gettimeofday(&end,NULL);double time =(end.tv_sec-start.tv_sec)+(end.tv_usec-start.tv_usec)*1.0/1000000;printf("time::%lf\niops::%lf\n",time,n/time);// rc = spdk_nvme_ns_cmd_write(ns_entry->ns, ns_entry->qpair, sequence.buf,// 0, /* LBA start */// 1, /* number of LBAs */// write_complete, &sequence, 0);if(rc !=0){fprintf(stderr,"starting write I/O failed\n");exit(1);}/*
* Poll for completions. 0 here means process all available completions.
* In certain usage models, the caller may specify a positive integer
* instead of 0 to signify the maximum number of completions it should
* process. This function will never block - if there are no
* completions pending on the specified qpair, it will return immediately.
*
* When the write I/O completes, write_complete() will submit a new I/O
* to read LBA 0 into a separate buffer, specifying read_complete() as its
* completion routine. When the read I/O completes, read_complete() will
* print the buffer contents and set sequence.is_completed = 1. That will
* break this loop and then exit the program.
*/while(!sequence.is_completed){//处理在队列对上提交的未完成的i/o的所有未完成的操作spdk_nvme_qpair_process_completions(ns_entry->qpair,0);//参数1:队列对,参数2:限制一次呼叫要处理的玩完成次数(0为无限制)}/*
* Free the I/O qpair. This typically is done when an application exits.
* But SPDK does support freeing and then reallocating qpairs during
* operation. It is the responsibility of the caller to ensure all
* pending I/O are completed before trying to free the qpair.
*/spdk_nvme_ctrlr_free_io_qpair(ns_entry->qpair);//释放申请的qpair}}static bool
probe_cb(void*cb_ctx,conststruct spdk_nvme_transport_id *trid,struct spdk_nvme_ctrlr_opts *opts){printf("Attaching to %s\n", trid->traddr);return true;}staticvoidattach_cb(void*cb_ctx,conststruct spdk_nvme_transport_id *trid,struct spdk_nvme_ctrlr *ctrlr,conststruct spdk_nvme_ctrlr_opts *opts){int nsid, num_ns;struct ctrlr_entry *entry;struct spdk_nvme_ns *ns;conststruct spdk_nvme_ctrlr_data *cdata;
entry =malloc(sizeof(struct ctrlr_entry));if(entry ==NULL){perror("ctrlr_entry malloc");exit(1);}printf("Attached to %s\n", trid->traddr);/*
* spdk_nvme_ctrlr is the logical abstraction in SPDK for an NVMe
* controller. During initialization, the IDENTIFY data for the
* controller is read using an NVMe admin command, and that data
* can be retrieved using spdk_nvme_ctrlr_get_data() to get
* detailed information on the controller. Refer to the NVMe
* specification for more details on IDENTIFY for NVMe controllers.
*/
cdata =spdk_nvme_ctrlr_get_data(ctrlr);//得到nvme控制器中的数据snprintf(entry->name,sizeof(entry->name),"%-20.20s (%-20.20s)", cdata->mn, cdata->sn);
entry->ctrlr = ctrlr;TAILQ_INSERT_TAIL(&g_controllers, entry, link);/*
* Each controller has one or more namespaces. An NVMe namespace is basically
* equivalent to a SCSI LUN. The controller's IDENTIFY data tells us how
* many namespaces exist on the controller. For Intel(R) P3X00 controllers,
* it will just be one namespace.
*
* Note that in NVMe, namespace IDs start at 1, not 0.
*///每个ctrlr中的namespace的起始是从1开始的
num_ns =spdk_nvme_ctrlr_get_num_ns(ctrlr);//获取namespace的数量printf("Using controller %s with %d namespaces.\n", entry->name, num_ns);for(nsid =1; nsid <= num_ns; nsid++){
ns =spdk_nvme_ctrlr_get_ns(ctrlr, nsid);//获取相应的namespaceif(ns ==NULL){continue;}register_ns(ctrlr, ns);}}staticvoidcleanup(void){struct ns_entry *ns_entry,*tmp_ns_entry;struct ctrlr_entry *ctrlr_entry,*tmp_ctrlr_entry;struct spdk_nvme_detach_ctx *detach_ctx =NULL;TAILQ_FOREACH_SAFE(ns_entry,&g_namespaces, link, tmp_ns_entry){TAILQ_REMOVE(&g_namespaces, ns_entry, link);free(ns_entry);}TAILQ_FOREACH_SAFE(ctrlr_entry,&g_controllers, link, tmp_ctrlr_entry){TAILQ_REMOVE(&g_controllers, ctrlr_entry, link);spdk_nvme_detach_async(ctrlr_entry->ctrlr,&detach_ctx);free(ctrlr_entry);}while(detach_ctx &&spdk_nvme_detach_poll_async(detach_ctx)==-EAGAIN){;}}staticvoidusage(constchar*program_name){printf("%s [options]", program_name);printf("\n");printf("options:\n");printf(" -V enumerate VMD\n");}staticintparse_args(int argc,char**argv){int op;while((op =getopt(argc, argv,"V"))!=-1){switch(op){case'V':
g_vmd = true;break;default:usage(argv[0]);return1;}}return0;}intmain(int argc,char**argv){int rc;struct spdk_env_opts opts;
rc =parse_args(argc, argv);if(rc !=0){return rc;}/*
* SPDK relies on an abstraction around the local environment
* named env that handles memory allocation and PCI device operations.
* This library must be initialized first.
*
*/spdk_env_opts_init(&opts);//初始化操作
opts.name ="hello_world";
opts.shm_id =0;if(spdk_env_init(&opts)<0){//初始化环境fprintf(stderr,"Unable to initialize SPDK env\n");return1;}printf("Initializing NVMe Controllers\n");if(g_vmd &&spdk_vmd_init()){//vmd初始化卷管理设备fprintf(stderr,"Failed to initialize VMD."" Some NVMe devices can be unavailable.\n");}/*
* Start the SPDK NVMe enumeration process. probe_cb will be called
* for each NVMe controller found, giving our application a choice on
* whether to attach to each controller. attach_cb will then be
* called for each controller after the SPDK NVMe driver has completed
* initializing the controller we chose to attach.
*//*
单线程,根据需要将用户空间NVME驱动程序附加到找到的每个设备上,既是将找到的nvme控制器选择附加到用户空间驱动程序
*/
rc =spdk_nvme_probe(NULL,NULL, probe_cb, attach_cb,NULL);//probe_cb 为找到的每个NVME控制器if(rc !=0){fprintf(stderr,"spdk_nvme_probe() failed\n");cleanup();return1;}if(TAILQ_EMPTY(&g_controllers)){//判断控制器尾队列是否为空fprintf(stderr,"no NVMe controllers found\n");cleanup();return1;}printf("Initialization complete.\n");hello_world();cleanup();if(g_vmd){spdk_vmd_fini();}return0;}