DPDK用户态的中断处理

it2023-03-13  73

本文主要关注中断从硬件如何上报到用户态、用户态开关中断相关处理。

本文基于hns3 PMD driver,选取DPDK的Rx interrupt特性,使用DPDK工程中的l3fwd-power测试程序来进行说明。

一. 简短概括:

熟悉DPDK的小伙伴可以直接看本章节如下总结部分,如果需要详细细节,可以进一步查看第二节。

1)用户态执行设备驱动的.dev_start函数时,为网卡的每个硬件队列创建一个eventfd,使用epoll来关联监听所有的eventfd;

2)当网卡硬件中断上报cpu,内核态vfio-pci驱动进行中断处理,在中断处理函数中调用eventfd_signal向用户态上报事件信息;

3)在用户态,当通过epoll_wait监听到有事件发生后,通过read从相应的eventfd读取相关事件,应用程序以此可以进一步做相关处理。

二. 详细处理:

以hns3 PMD driver为例,说明网卡PMD驱动部分处理:

驱动初始化及.dev_start函数:

(.dev_init钩子函数)hns3_dev_init -> hns3_init_pf

static int

hns3_init_pf(struct rte_eth_dev *eth_dev)

{

// 注册vector0相关中断处理函数,使能vector0中断

hns3_clear_all_event_cause(hw); ret = rte_intr_callback_register(&pci_dev->intr_handle, hns3_interrupt_handler, eth_dev); if (ret) { PMD_INIT_LOG(ERR, "Failed to register intr: %d", ret); goto err_intr_callback_register; }

/* Enable interrupt */

rte_intr_enable(&pci_dev->intr_handle); hns3_pf_enable_irq0(hw);

}

.dev_start函数

(.dev_start钩子函数)hns3_dev_start -> hns3_map_rx_interrupt

static int

hns3_map_rx_interrupt(struct rte_eth_dev *dev)

{

/* disable uio/vfio intr/eventfd mapping */ rte_intr_disable(intr_handle); intr_vector = hw->used_rx_queues; /* creates event fd for each intr vector when MSIX is used */ if (rte_intr_efd_enable(intr_handle, intr_vector)) return -EINVAL; }

for (q_id = 0; q_id < hw->used_rx_queues; q_id++) {

ret = hns3_bind_ring_with_vector(hw, vec, true,,);

}

rte_intr_enable(intr_handle);

}

DPDK相关API实现:

int

rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd)

{

uint32_t n = RTE_MIN(nb_efd,(uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);

if (intr_handle->type == RTE_INTR_HANDLE_VFIO_MSIX) { for (i = 0; i < n; i++) { fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); intr_handle->efds[i] = fd; }

}

}

rte_intr_enable -> vfio_enable_msix

/* enable MSI-X interrupts */

static int

vfio_enable_msix(const struct rte_intr_handle *intr_handle) {

irq_set = (struct vfio_irq_set *) irq_set_buf; irq_set->argsz = len; /* 0 < irq_set->count < RTE_MAX_RXTX_INTR_VEC_ID + 1 */ irq_set->count = intr_handle->max_intr ? (intr_handle->max_intr > RTE_MAX_RXTX_INTR_VEC_ID + 1 ? RTE_MAX_RXTX_INTR_VEC_ID + 1 : intr_handle->max_intr) : 1; irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |

VFIO_IRQ_SET_ACTION_TRIGGER;

irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; irq_set->start = 0; fd_ptr = (int *) &irq_set->data; /* INTR vector offset 0 reserve for non-efds mapping */ fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = intr_handle->fd; memcpy(&fd_ptr[RTE_INTR_VEC_RXTX_OFFSET], intr_handle->efds, sizeof(*intr_handle->efds) * intr_handle->nb_efd); ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS,

irq_set);

}

l3fwd-power中的用法

收发包处理:

/* main processing loop */

static int

main_loop(__rte_unused void *dummy)

{

/* add into event wait list */ if (event_register(qconf) == 0) intr_en = 1; <snip> while (1) {

start_rx:

//打开网卡硬件中断

turn_on_off_intr(qconf, 1);

sleep_until_rx_interrupt(qconf->n_rx_queue);

//关闭网卡硬件中断

turn_on_off_intr(qconf, 0);

/start receiving packets immediately/

goto start_rx;

}

}

static int event_register(struct lcore_conf *qconf)

{

for (i = 0; i < qconf->n_rx_queue; ++i) { rx_queue = &(qconf->rx_queue_list[i]); portid = rx_queue->port_id; queueid = rx_queue->queue_id; data = portid << CHAR_BIT | queueid; ret = rte_eth_dev_rx_intr_ctl_q(portid, queueid, RTE_EPOLL_PER_THREAD, RTE_INTR_EVENT_ADD, (void*)((uintptr_t)data)); if (ret) return ret; } return 0;

}

static void turn_on_off_intr(struct lcore_conf *qconf, bool on)

{

int i; struct lcore_rx_queue *rx_queue; uint8_t queue_id; uint16_t port_id; for (i = 0; i < qconf->n_rx_queue; ++i) { rx_queue = &(qconf->rx_queue_list[i]); port_id = rx_queue->port_id; queue_id = rx_queue->queue_id; rte_spinlock_lock(&(locks[port_id])); if (on) rte_eth_dev_rx_intr_enable(port_id, queue_id); else rte_eth_dev_rx_intr_disable(port_id, queue_id); rte_spinlock_unlock(&(locks[port_id])); }

}

/**

force polling thread sleep until one-shot rx interrupt triggers

*/

static int

sleep_until_rx_interrupt(int num)

{

<snip> n = rte_epoll_wait(RTE_EPOLL_PER_THREAD, event, num, -1); for (i = 0; i < n; i++) { data = event[i].epdata.data; port_id = ((uintptr_t)data) >> CHAR_BIT; queue_id = ((uintptr_t)data) & RTE_LEN2MASK(CHAR_BIT, uint8_t); RTE_LOG(INFO, L3FWD_POWER, "lcore %u is waked up from rx interrupt on" " port %d queue %d\n", rte_lcore_id(), port_id, queue_id); } return 0;

}

DPDK相关API实现:

rte_eth_dev_rx_intr_ctl_q -> rte_intr_rx_ctl

eal_intr_proc_rxtx_intr -> read

rte_epoll_ctl -> epoll_ctl

rte_intr_rx_ctl(struct rte_intr_handle *intr_handle, int epfd,

int op, unsigned int vec, void *data)

{

<snip> switch (op) { case RTE_INTR_EVENT_ADD: epfd_op = EPOLL_CTL_ADD; rev = &intr_handle->elist[efd_idx]; <snip> /* attach to intr vector fd */ epdata = &rev->epdata; epdata->event = EPOLLIN | EPOLLPRI | EPOLLET; epdata->data = data; epdata->cb_fun =

(rte_intr_event_cb_t)eal_intr_proc_rxtx_intr;

epdata->cb_arg = (void *)intr_handle; rc = rte_epoll_ctl(epfd, epfd_op, intr_handle->efds[efd_idx], rev);

}

}

3.内核态vfio-pci处理

最新回复(0)