首先来看下netback收包的过程,netback的收包是通过调用netif_be_start_xmit来进行的。在这之前,netfront要分配好page,通过GR交给netback,这样netback才可以把包的内容copy到这些GR指向的page中。每次netback能接收多少包是由netfront交给netback多少个GR指向的page决定的。
netfront是通过结构体struct xen_netif_rx_request把grant_ref_t交给netback的,id用于标识一对xen_netif_rx_request, xen_netif_rx_response。netfront释放了rx_request之后,IO ring上的req_prod就会增加,netif->rx.sing->req_prod - netif->rx_req_cons_peek中间的slots则大致表示了netfront可以接收的page个数。
netback要通过struct xen_netif_rx_response才能真正的把包返回给netfront,在这之前,GR的page里已经copy好了包的内容。这就要求netif->rx_req_cons_peek - netif->rx.rsp_prod_pvt中间的slot空间也要足够才行
netbk_copy_skb函数用来做一次skb的全量copy。原skb的线性空间有可能是跨page的,copy结束后可以保证线性空间不跨page。而frags数组的内容也被copy到新的page中,然后挂到新的skb下面
int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct xen_netif *netif = netdev_priv(dev);
BUG_ON(skb->dev != dev);
/* Drop the packet if the target domain has no receive buffers. */
if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
goto drop;
/*
* XXX For now we also copy skbuffs whose head crosses a page
* boundary, because netbk_gop_skb can't handle them.
*/
if ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE) {
struct sk_buff *nskb = netbk_copy_skb(skb);
if ( unlikely(nskb == NULL) )
goto drop;
/* Copy only the header fields we use in this driver. */
nskb->dev = skb->dev;
nskb->ip_summed = skb->ip_summed;
dev_kfree_skb(skb);
skb = nskb;
}
netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 +
!!skb_shinfo(skb)->gso_size;
skb_shinfo(skb)->gso_size如果不为0,那么需要给netfront传递一个xen_netif_extra_info的rx_response。skb线性区域占一个xen_netif_rx_response,剩下的每个skb_frag_t占一个xen_netif_rx_response
netif_get(netif);
if (netbk_can_queue(dev) && netbk_queue_full(netif)) {
netif->rx.sring->req_event = netif->rx_req_cons_peek +
netbk_max_required_rx_slots(netif);
mb(); /* request notification /then/ check & stop the queue */
if (netbk_queue_full(netif)) {
netif_stop_queue(dev);
/*
* Schedule 500ms timeout to restart the queue, thus
* ensuring that an inactive queue will be drained.
* Packets will be immediately be dropped until more
* receive buffers become available (see
* netbk_queue_full() check above).
*/
netif->tx_queue_timeout.data = (unsigned long)netif;
netif->tx_queue_timeout.function = tx_queue_callback;
mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2);
}
}
如果此时IO ring中 rx.req_prod - rx.req_cons 剩下的空间不足,tx_queue休眠500ms,这段时间内有可能会造成tx_queue丢包
skb_queue_tail(&rx_queue, skb);
tasklet_schedule(&net_rx_tasklet);
把skb追加到rx_queue末尾,触发net_rx_action软中断
return 0;
drop:
netif->stats.tx_dropped++;
dev_kfree_skb(skb);
return 0;
}
这个版本的netback通过copy的方式,把后端的skb的内容copy给前端的page(之前前端通过GR给了后端foreign access),这个过程涉及到了如下数据结构
// 每个分片对应一个xen_netif_rx_response的id
struct netbk_rx_meta {
skb_frag_t frag;
int id;
};
//入包的skb对应的操作集合,这里用到了meta, copy两个
struct netrx_pending_operations {
unsigned trans_prod, trans_cons;
unsigned mmu_prod, mmu_mcl;
unsigned mcl_prod, mcl_cons;
unsigned copy_prod, copy_cons;
unsigned meta_prod, meta_cons;
struct mmu_update *mmu;
struct gnttab_transfer *trans;
struct gnttab_copy *copy;
struct multicall_entry *mcl;
struct netbk_rx_meta *meta;
};
static struct pending_tx_info {
struct xen_netif_tx_request req;
struct xen_netif *netif;
} pending_tx_info[MAX_PENDING_REQS];
static u16 pending_ring[MAX_PENDING_REQS];
typedef unsigned int pending_ring_idx_t;
先来看netbk_gop_skb,该函数主要为skb准备好gnttab_copy, netbk_rx_meta结构体。
static u16 netbk_gop_frag(struct xen_netif *netif, struct netbk_rx_meta *meta,
int i, struct netrx_pending_operations *npo,
struct page *page, unsigned long size,
unsigned long offset)
{
struct gnttab_copy *copy_gop;
struct xen_netif_rx_request *req;
unsigned long old_mfn;
int idx = netif_page_index(page);
old_mfn = virt_to_mfn(page_address(page));
req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i);
copy_gop = npo->copy + npo->copy_prod++;
copy_gop->flags = GNTCOPY_dest_gref;
if (idx > -1) {
struct pending_tx_info *src_pend = &pending_tx_info[idx];
copy_gop->source.domid = src_pend->netif->domid;
copy_gop->source.u.ref = src_pend->req.gref;
copy_gop->flags |= GNTCOPY_source_gref;
} else {
copy_gop->source.domid = DOMID_SELF;
copy_gop->source.u.gmfn = old_mfn;
}
copy_gop->source.offset = offset;
copy_gop->dest.domid = netif->domid;
copy_gop->dest.offset = 0;
copy_gop->dest.u.ref = req->gref;
copy_gop->len = size;
对于接收的场景,此时idx = -1,所以copy_gop->source.domid 是dom0自己DOMID_SELF
copy_gop->dest.domid是guest domain,还记得backend_create_netif中,调用的netif_alloc么,此时传入的是前端的domid,所以xen_netif->domid就保存了前端的domain id
e.g. netif_alloc(&dev->dev, dev->otherend_id, handle);
所以这个gnttab_copy的语义为从源domain (dom0)向目的domain (netif->domid),拷贝size大小的数据,源page则为virt_to_mfn(page_address(page))计算出来的machine address,源offset为传入的offset参数,目的page (copy_gop->dest.u.ref) 为GR (xen_netif_rx_request->gref)指向的page,目的offset为0
return req->id;
}
static void netbk_gop_skb(struct sk_buff *skb,
struct netrx_pending_operations *npo)
{
struct xen_netif *netif = netdev_priv(skb->dev);
int nr_frags = skb_shinfo(skb)->nr_frags;
int i;
int extra;
struct netbk_rx_meta *head_meta, *meta;
head_meta = npo->meta + npo->meta_prod++;
head_meta->frag.page_offset = skb_shinfo(skb)->gso_type;
head_meta->frag.size = skb_shinfo(skb)->gso_size;
extra = !!head_meta->frag.size + 1;
head_meta由于代表了skb线性空间,所以其frag被拿来存放gso信息。如果gso_size不为0,extra为2,否则为1
for (i = 0; i < nr_frags; i++) {
meta = npo->meta + npo->meta_prod++;
meta->frag = skb_shinfo(skb)->frags[i];
meta->id = netbk_gop_frag(netif, meta, i + extra, npo,
meta->frag.page,
meta->frag.size,
meta->frag.page_offset);
}
对于所有的nr_frags的page,调用netbk_gop_frag生成gnttab_copy
/*
* This must occur at the end to ensure that we don't trash skb_shinfo
* until we're done. We know that the head doesn't cross a page
* boundary because such packets get copied in netif_be_start_xmit.
*/
head_meta->id = netbk_gop_frag(netif, head_meta, 0, npo,
virt_to_page(skb->data),
skb_headlen(skb),
offset_in_page(skb->data));
对于skb线性空间,也有一个对应的page要处理
netif->rx.req_cons += nr_frags + extra;
处理完毕,rx.req_cons增加相应值
}
这个版本的netback是通过tasklet软中断的方式来处理报文的,入包的软中断处理函数是net_rx_action
static void net_rx_action(unsigned long unused)
{
struct xen_netif *netif = NULL;
s8 status;
u16 id, irq, flags;
struct xen_netif_rx_response *resp;
struct multicall_entry *mcl;
struct sk_buff_head rxq;
struct sk_buff *skb;
int notify_nr = 0;
int ret;
int nr_frags;
int count;
unsigned long offset;
struct netrx_pending_operations npo = {
mmu: rx_mmu,
trans: grant_trans_op,
copy: grant_copy_op,
mcl: rx_mcl,
meta: meta};
skb_queue_head_init(&rxq);
count = 0;
while ((skb = skb_dequeue(&rx_queue)) != NULL) {
nr_frags = skb_shinfo(skb)->nr_frags;
*(int *)skb->cb = nr_frags;
netbk_gop_skb(skb, &npo);
count += nr_frags + 1;
__skb_queue_tail(&rxq, skb);
/* Filled the batch queue? */
if (count + MAX_SKB_FRAGS >= NET_RX_RING_SIZE)
break;
}
把netif_be_start_xmit的skb queue中的skb取出来,调用netbk_gop_skb准备好netrx_pending_operations的meta, copy结构。之后调用hyper call完成GR的内容拷贝
完成后的skb被加到rxq的队列
while ((skb = __skb_dequeue(&rxq)) != NULL) {
nr_frags = *(int *)skb->cb;
netif = netdev_priv(skb->dev);
netif->stats.tx_bytes += skb->len;
netif->stats.tx_packets++;
status = netbk_check_gop(nr_frags, netif->domid, &npo);
netbk_check_gop用来检查gnttab_copy操作是否成功,用来查看是否返回GNTST_okay
id = meta[npo.meta_cons].id;
flags = nr_frags ? NETRXF_more_data : 0;
meta数组记录了skb中每个frag对应page的信息,如果skb的nr_frags不为空,设置gso标志
if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
flags |= NETRXF_csum_blank | NETRXF_data_validated;
else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
/* remote but checksummed. */
flags |= NETRXF_data_validated;
offset = 0;
resp = make_rx_response(netif, id, status, offset,
skb_headlen(skb), flags);
构造xen_netif_rx_response并放到IO ring中,这里对应的是skb线性空间的page
if (meta[npo.meta_cons].frag.size) {
struct xen_netif_extra_info *gso =
(struct xen_netif_extra_info *)
RING_GET_RESPONSE(&netif->rx,
netif->rx.rsp_prod_pvt++);
resp->flags |= NETRXF_extra_info;
gso->u.gso.size = meta[npo.meta_cons].frag.size;
gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
gso->u.gso.pad = 0;
gso->u.gso.features = 0;
gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
gso->flags = 0;
}
如果frag size不为0,那么构造一个xen_netif_extra_info类型的xen_netif_rx_response,并加入到IO ring中。
netbk_add_frag_responses(netif, status,
meta + npo.meta_cons + 1,
nr_frags);
对于所有的frags,调用make_rx_response生成xen_netif_rx_response并放入到IO ring中。
RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
irq = netif->irq;
if (ret && !rx_notify[irq] &&
(netif->smart_poll != 1)) {
rx_notify[irq] = 1;
notify_list[notify_nr++] = irq;
}
if (netif_queue_stopped(netif->dev) &&
netif_schedulable(netif) &&
!netbk_queue_full(netif))
netif_wake_queue(netif->dev);
/*
* netfront_smartpoll_active indicates whether
* netfront timer is active.
*/
if ((netif->smart_poll == 1)) {
if (!(netif->rx.sring->private.netif.smartpoll_active)) {
notify_remote_via_irq(irq);
netif->rx.sring->private.netif.smartpoll_active = 1;
}
}
netif_put(netif);
dev_kfree_skb(skb);
skb包已经被拷贝到前端,可以释放skb了
npo.meta_cons += nr_frags + 1;
meta_cons增加处理过的相应page个数
}
while (notify_nr != 0) {
irq = notify_list[--notify_nr];
rx_notify[irq] = 0;
notify_remote_via_irq(irq);
}
/* More work to do? */
if (!skb_queue_empty(&rx_queue) && !timer_pending(&net_timer))
tasklet_schedule(&net_rx_tasklet);
}