【DPDK】DPDK中断机制学习(一)

该文详细介绍了DPDK中rte_eal_intr_init()函数的中断初始化过程,包括初始化中断源链表、创建pipe管道、启动处理线程以及中断事件的处理机制,特别是如何利用epoll监控中断事件并调用用户注册的回调函数。
摘要由CSDN通过智能技术生成

中断初始化流程

rte_eal_init()时进行中断初始化,rte_eal_intr_init()源码实现如下:

int rte_eal_intr_init(void)
{
	int ret = 0;

	/* init the global interrupt source head */
	TAILQ_INIT(&intr_sources);

	/**
	 * create a pipe which will be waited by epoll and notified to
	 * rebuild the wait list of epoll.
	 */
	if (pipe(intr_pipe.pipefd) < 0) {
		rte_errno = errno;
		return -1;
	}

	/* create the host thread to wait/handle the interrupt */
	ret = rte_ctrl_thread_create(&intr_thread, "eal-intr-thread", NULL,
			eal_intr_thread_main, NULL);
	if (ret != 0) {
		rte_errno = -ret;
		RTE_LOG(ERR, EAL,
			"Failed to create thread for interrupt handling\n");
	}

	return ret;
}

中断初始化主要做了几件事情:

  1. 初始化intr_sources链表,所有注册的中断都会挂在这个链表上,链表结构体定义如下:
/* interrupt sources list */
static struct rte_intr_source_list intr_sources;

TAILQ_HEAD(rte_intr_source_list, rte_intr_source);

struct rte_intr_source {
	TAILQ_ENTRY(rte_intr_source) next;
	struct rte_intr_handle intr_handle; /**< interrupt handle */
	struct rte_intr_cb_list callbacks;  /**< user callbacks */
	uint32_t active;
};

intr_handle:设备的中断句柄结构,保存了设备中断文件描述符

struct rte_intr_handle {
	RTE_STD_C11
	union {
		struct {
			RTE_STD_C11
			union {
				/** VFIO device file descriptor */
				int vfio_dev_fd;
				/** UIO cfg file desc for uio_pci_generic */
				int uio_cfg_fd;
			};
			int fd;	/**< interrupt event file descriptor */
		};
		void *handle; /**< device driver handle (Windows) */
	};
	enum rte_intr_handle_type type;  /**< handle type */
	uint32_t max_intr;             /**< max interrupt requested */
	uint32_t nb_efd;               /**< number of available efd(event fd) */
	uint8_t efd_counter_size;      /**< size of efd counter, used for vdev */
	int efds[RTE_MAX_RXTX_INTR_VEC_ID];  /**< intr vectors/efds mapping */
	struct rte_epoll_event elist[RTE_MAX_RXTX_INTR_VEC_ID];
				       /**< intr vector epoll event */
	int *intr_vec;                 /**< intr vector number array */
};

callbacks:用户注册的回调函数,可以注册多个回调函数,所以是list结构

  1. 使用pipe()创建管道,参数结构体定义如下:
static union intr_pipefds intr_pipe;

union intr_pipefds{
	struct {
		int pipefd[2];
	};
	struct {
		int readfd; //存储读文件描述符
		int writefd; //存储写文件描述符
	};
};

pipe管道的作用:后面创建的intr线程,会创建epoll实例,用来监听感兴趣的事件,即监听注册到epoll实例上的文件描述符,其中之一就是监听pipe的readfd。当中断链表有新增或者移除中断事件时,需要通过写writefd来通知epoll rebuild wait list。

  1. 创建intr线程,等待处理中断
//The struct epoll_event is defined as:

typedef union epoll_data {
   void    *ptr;
   int      fd;
   uint32_t u32;
   uint64_t u64;
} epoll_data_t;

struct epoll_event {
   uint32_t     events;    /* Epoll events */
   epoll_data_t data;      /* User data variable */
};

static __rte_noreturn void *
eal_intr_thread_main(__rte_unused void *arg)
{
  /* host thread:死循环,不会退出*/
  for (;;) {
      static struct epoll_event pipe_event = {
        .events = EPOLLIN | EPOLLPRI,
      };
      struct rte_intr_source *src;
      unsigned numfds = 0;

      /*创建一个epoll实例, pfd为返回的epoll句柄,用于后续所有对epool实例的调用*/
      int pfd = epoll_create(1);
      if (pfd < 0)
          rte_panic("Cannot create epoll instance\n");

      pipe_event.data.fd = intr_pipe.readfd;

      /*注册intr_pipe.readfd到epoll实例上*/
      if (epoll_ctl(pfd, EPOLL_CTL_ADD, intr_pipe.readfd, &pipe_event) < 0) {
          rte_panic("Error adding fd to %d epoll_ctl, %s\n", intr_pipe.readfd, strerror(errno));
      }
      numfds++;

      rte_spinlock_lock(&intr_lock);

      /*遍历全局的中断链表,注册设备中断文件描述符到epoll上*/
      TAILQ_FOREACH(src, &intr_sources, next)
      {
          struct epoll_event ev;
          if (src->callbacks.tqh_first == NULL)
              continue; /* 跳过回调函数列表为空的item */
          memset(&ev, 0, sizeof(ev));
          ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP;
          ev.data.fd = src->intr_handle.fd;

          if (epoll_ctl(pfd, EPOLL_CTL_ADD,
          		src->intr_handle.fd, &ev) < 0){
          	rte_panic("Error adding fd %d epoll_ctl, %s\n",
          		src->intr_handle.fd, strerror(errno));
          }
          else
          	numfds++;
      }
      rte_spinlock_unlock(&intr_lock);

      //处理中断函数,一般为死循环,只有需要rebuild epoll wait list时,才会break
      eal_intr_handle_interrupts(pfd, numfds);

      /*创建好epoll句柄后,它会占用一个fd,在使用完epoll时,要调用close()关闭,否则可能会导致fd耗尽*/
      close(pfd);
    }
}

接下来看一下中断处理函数做了什么, epoll_wait()等待事件发生,返回值为需要处理的事件数目
函数定义:int epoll_wait(int epfd,struct epoll_event * events,int maxevents,int timeout)

// pfd: epoll file descriptor
// totalfds: 添加到epool的文件描述符数量

static void
eal_intr_handle_interrupts(int pfd, unsigned totalfds)
{
    struct epoll_event events[totalfds];
    int nfds = 0;

    for(;;)
    {
        nfds = epoll_wait(pfd, events, totalfds, EAL_INTR_EPOLL_WAIT_FOREVER);
        /* epoll_wait fail */
        if (nfds < 0) {
            if (errno == EINTR)
                continue;
            RTE_LOG(ERR, EAL, "epoll_wait returns with fail\n");
            return;
		}
    /* nfds等于0代表等待超时,因timeout参数传入为-1,代表永久阻塞,所以超时的选项这里不会发生 */
    else if (nfds == 0)
        continue;
    /*nfds大于0,代表有需要处理的事件发生*/
    /*eal_intr_process_interrupts函数返回值小于0时,代表需要rebuild epoll wait list,此时需要return回eal_intr_thread_main去执行rebuild操作*/
    if (eal_intr_process_interrupts(events, nfds) < 0)
        return;
    }
}

static int
eal_intr_process_interrupts(struct epoll_event *events, int nfds)
{
    bool call = false;
    int n, bytes_read, rv;
    struct rte_intr_source *src;
    struct rte_intr_callback *cb, *next;
    union rte_intr_read_buffer buf;
    struct rte_intr_callback active_cb;

    for (n = 0; n < nfds; n++) {
      /*如果监听到的是pipe fd,代表需要rebuild epoll wait list,return -1*/
    	if (events[n].data.fd == intr_pipe.readfd){
    		int r = read(intr_pipe.readfd, buf.charbuf, sizeof(buf.charbuf));
    		RTE_SET_USED(r);
    		return -1;
    	}

      /*遍历中断链表,找到监听到事件的句柄*/
    	rte_spinlock_lock(&intr_lock);
    	TAILQ_FOREACH(src, &intr_sources, next)
    		if (src->intr_handle.fd == events[n].data.fd)
    			break;

      if (src == NULL){
    		rte_spinlock_unlock(&intr_lock);
    		continue;
    	}

    	/*找到后置标记位,释放锁*/
    	src->active = 1;
    	rte_spinlock_unlock(&intr_lock);

    	/* 根据不同的句柄类型,设置对应的读取长度 */
    	switch (src->intr_handle.type) {
    	case RTE_INTR_HANDLE_UIO:
    	case RTE_INTR_HANDLE_UIO_INTX:
    		bytes_read = sizeof(buf.uio_intr_count);
    		break;
    	case RTE_INTR_HANDLE_ALARM:
    		bytes_read = sizeof(buf.timerfd_num);
    		break;
    #ifdef VFIO_PRESENT
    #ifdef HAVE_VFIO_DEV_REQ_INTERFACE
    	case RTE_INTR_HANDLE_VFIO_REQ:
    #endif
    	case RTE_INTR_HANDLE_VFIO_MSIX:
    	case RTE_INTR_HANDLE_VFIO_MSI:
    	case RTE_INTR_HANDLE_VFIO_LEGACY:
    		bytes_read = sizeof(buf.vfio_intr_count);
    		break;
    #endif
    	case RTE_INTR_HANDLE_VDEV:
    	case RTE_INTR_HANDLE_EXT:
    		bytes_read = 0;
    		call = true;
    		break;
    	case RTE_INTR_HANDLE_DEV_EVENT:
    		bytes_read = 0;
    		call = true;
    		break;
    	default:
    		bytes_read = 1;
    		break;
    	}

      if (bytes_read > 0)
      {
          bytes_read = read(events[n].data.fd, &buf, bytes_read);
          if (bytes_read < 0)
          {
              if (errno == EINTR || errno == EWOULDBLOCK)
                  continue;
              RTE_LOG(ERR, EAL, "Error reading from file descriptor %d: %s\n", events[n].data.fd, strerror(errno));

              /*这种情况可能是设备被拔掉或者出问题了,从中断链表上移除*/
              rte_spinlock_lock(&intr_lock);
              TAILQ_REMOVE(&intr_sources, src, next);
              rte_spinlock_unlock(&intr_lock);

              /*若有注册的回调函数,将回调函数的资源释放*/
              for (cb = TAILQ_FIRST(&src->callbacks); cb; cb = next)
              {
                  next = TAILQ_NEXT(cb, next);
                  TAILQ_REMOVE(&src->callbacks, cb, next);
                  free(cb);
              }
              free(src);
              return -1; //force return -1通知重建epoll
          }
          else if (bytes_read == 0)
              RTE_LOG(ERR, EAL, "Read nothing from file descriptor %d\n", events[n].data.fd);
          else
              call = true;
      }

      rte_spinlock_lock(&intr_lock);
      /*事件发生,调用回调函数们*/
      if (call)
      {
          TAILQ_FOREACH(cb, &src->callbacks, next)
          {
              /* make a copy and unlock. */
              active_cb = *cb;
              rte_spinlock_unlock(&intr_lock);

              /* 执行回调函数 */
              active_cb.cb_fn(active_cb.cb_arg);

              /*get the lock back. */
              rte_spinlock_lock(&intr_lock);
          }
      }
      /* we done with that interrupt source, release it. */
      src->active = 0;
      rv = 0;

      /* check if any callback are supposed to be removed */
      for (cb = TAILQ_FIRST(&src->callbacks); cb != NULL; cb = next)
      {
          next = TAILQ_NEXT(cb, next);
          if (cb->pending_delete)//代表回调函数调用后需将其删除
          {
              TAILQ_REMOVE(&src->callbacks, cb, next);
              if (cb->ucb_fn)/**< fn to call before cb is deleted */
                  cb->ucb_fn(&src->intr_handle, cb->cb_arg);
              free(cb);
              rv++;
          }
      }

      /* 中断事件的所有回调都被删除时,则从中断链表上移除 */
      if (TAILQ_EMPTY(&src->callbacks)) {
          TAILQ_REMOVE(&intr_sources, src, next);
          free(src);
      }

      /* 若有回调函数移除,则通过写pipe write fd来通知epoll_wait rebuild wait list */
      if (rv > 0 && write(intr_pipe.writefd, "1", 1) < 0) {
          rte_spinlock_unlock(&intr_lock);
          return -EPIPE;
      }

      rte_spinlock_unlock(&intr_lock);
    }

    return 0;
}

中断回调的结构体定义:

struct rte_intr_callback {
	TAILQ_ENTRY(rte_intr_callback) next;
	rte_intr_callback_fn cb_fn;  /**< callback address */
	void *cb_arg;                /**< parameter for callback */
	uint8_t pending_delete;      /**< delete after callback is called */
	rte_intr_unregister_callback_fn ucb_fn; /**< fn to call before cb is deleted */
};
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值