下面主要介绍mac接收网络数据帧中使用的new api接口以及对其流程的分析。
1、fec mac初始化流程中涉及new api的接口
static int fec_enet_init(struct net_device *ndev)
{
...
//添加new api poll接口
netif_napi_add(ndev, &fep->napi, fec_enet_rx_napi/*poll函数*/, NAPI_POLL_WEIGHT/*64*/);
...
}
static int fec_enet_open(struct net_device *ndev)
{
...
napi_enable(&fep->napi); //使能网卡new api
...
}
//中断回调函数:
static irqreturn_t fec_enet_interrupt(int irq, void *dev_id)
{
...
if (napi_schedule_prep(&fep->napi)) { //check if napi can be scheduled
/* Disable the NAPI interrupts */
writel(FEC_NAPI_IMASK, fep->hwp + FEC_IMASK); //关闭MII中断,Indicates that the MII has completed the data transfer requested
__napi_schedule(&fep->napi); //调度napi,poll函数:fec_enet_rx_napi
}
...
}
//new api poll接口
static int fec_enet_rx_napi(struct napi_struct *napi, int budget)
{
struct net_device *ndev = napi->dev;
struct fec_enet_private *fep = netdev_priv(ndev);
int pkts;
//处理网络数据帧的接收
pkts = fec_enet_rx(ndev, budget);
//处理数据帧传输完成
fec_enet_tx(ndev);
if (pkts < budget) {
napi_complete_done(napi, pkts); //napi poll 功能完成工作任务后,调用该函数通知网络设备层,napi poll结束
writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK); //开启中断
}
return pkts;
}
fec_enet_rx->fec_enet_rx_queue
static int fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
{
...
//GRO(Generic Receive Offload)
napi_gro_receive(&fep->napi, skb); //接收的网络数据帧上传给tcp/ip协议栈
...
}
static int fec_enet_close(struct net_device *ndev)
{
...
napi_disable(&fep->napi);
...
}
2、new api流程分析
enum {
NAPI_STATE_SCHED, /* Poll is scheduled */
NAPI_STATE_MISSED, /* reschedule a napi */
NAPI_STATE_DISABLE, /* Disable pending */
NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */
NAPI_STATE_HASHED, /* In NAPI hash (busy polling possible) */
NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */
NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */
};
enum {
NAPIF_STATE_SCHED = BIT(NAPI_STATE_SCHED),
NAPIF_STATE_MISSED = BIT(NAPI_STATE_MISSED),
NAPIF_STATE_DISABLE = BIT(NAPI_STATE_DISABLE),
NAPIF_STATE_NPSVC = BIT(NAPI_STATE_NPSVC),
NAPIF_STATE_HASHED = BIT(NAPI_STATE_HASHED),
NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
};
void netif_napi_add(struct net_device *dev, struct napi_struct *napi,int (*poll)(struct napi_struct *, int), int weight)
{
INIT_LIST_HEAD(&napi->poll_list);
hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
napi->timer.function = napi_watchdog;
napi->gro_count = 0;
napi->gro_list = NULL;
napi->skb = NULL;
napi->poll = poll;
if (weight > NAPI_POLL_WEIGHT)
pr_err_once("netif_napi_add() called with weight %d on device %s\n",weight, dev->name);
napi->weight = weight;
list_add(&napi->dev_list, &dev->napi_list); //将napi_struct挂接到net_device->napi_list链表上
napi->dev = dev;
#ifdef CONFIG_NETPOLL
napi->poll_owner = -1;
#endif
set_bit(NAPI_STATE_SCHED, &napi->state);
napi_hash_add(napi);
}
/**
*napi_enable - enable NAPI scheduling
*@n: NAPI context
*
* Resume NAPI from being scheduled on this context.
* Must be paired with napi_disable.
*/
static inline void napi_enable(struct napi_struct *n)
{
BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); //bit位置位返回true,否则返回false
//clear_bit() is atomic and may not be reordered.
//However, it does not contain a memory barrier, so if it is used for locking purposes,
//you should call smp_mb__before_atomic() and/or smp_mb__after_atomic() in order to ensure changes are visible on other processors.
smp_mb__before_atomic();
clear_bit(NAPI_STATE_SCHED, &n->state); //清除NAPI_STATE_SCHED标志
clear_bit(NAPI_STATE_NPSVC, &n->state);
}
/include/asm-generic/cmpxchg.h
/include/asm-generic/cmpxchg-local.h
cmpxchg(ptr, old, new),如果ptr和old的值一样,则把new写到ptr内存,返回old的值,整个操作是原子的
#define cmpxchg(ptr, o, n) cmpxchg_local((ptr), (o), (n))
#define cmpxchg_local(ptr, o, n) ({ \
((__typeof__(*(ptr)))__cmpxchg_local_generic((ptr), (unsigned long)(o),\
(unsigned long)(n), sizeof(*(ptr)))); \
})
/*
* Generic version of __cmpxchg_local (disables interrupts). Takes an unsigned
* long parameter, supporting various types of architectures.
*/
static inline unsigned long __cmpxchg_local_generic(volatile void *ptr,unsigned long old, unsigned long new, int size)
{
unsigned long flags, prev;
/*
* Sanity checking, compile-time.
*/
if (size == 8 && sizeof(unsigned long) != 8)
wrong_size_cmpxchg(ptr);
raw_local_irq_save(flags);
switch (size) {
case 1: prev = *(u8 *)ptr;
if (prev == old)
*(u8 *)ptr = (u8)new;
break;
case 2: prev = *(u16 *)ptr;
if (prev == old)
*(u16 *)ptr = (u16)new;
break;
case 4: prev = *(u32 *)ptr;
if (prev == old)
*(u32 *)ptr = (u32)new;
break;
case 8: prev = *(u64 *)ptr;
if (prev == old)
*(u64 *)ptr = (u64)new;
break;
default:
wrong_size_cmpxchg(ptr);
}
raw_local_irq_restore(flags);
return prev;
}
/**
* napi_schedule_prep - check if napi can be scheduled
* @n: napi context
*
* Test if NAPI routine is already running, and if not mark
* it as running. This is used as a condition variable
* insure only one NAPI poll instance runs. We also make
* sure there is no pending NAPI disable.
*/
bool napi_schedule_prep(struct napi_struct *n)
{
unsigned long val, new;
do {
val = READ_ONCE(n->state); //获取状态,这里没有对state变量加锁进行互斥处理,set_bit和clear_bit是原子操作
if (unlikely(val & NAPIF_STATE_DISABLE)) //disable则返回false
return false;
new = val | NAPIF_STATE_SCHED; //netif_napi_add时会设置该bit位
/* Sets STATE_MISSED bit if STATE_SCHED was already set
* This was suggested by Alexander Duyck, as compiler
* emits better code than :
* if (val & NAPIF_STATE_SCHED)
* new |= NAPIF_STATE_MISSED;
*/
new |= (val & NAPIF_STATE_SCHED) / NAPIF_STATE_SCHED * NAPIF_STATE_MISSED;
//比较n->state和val的值是否相等,如果相等则将new赋值给n->state,否则返回n->state的值
//上面代码过程中如果n->state的值发生改变,则循环。
} while (cmpxchg(&n->state, val, new) != val); //cmpxchg,CPU的指令集支持先比较后,如果相等则将new赋值给ptr,返回old
//其实就是给n->state设置NAPIF_STATE_SCHED标志
return !(val & NAPIF_STATE_SCHED);
}
/**
* __napi_schedule - schedule for receive
* @n: entry to schedule
*
* The entry's receive function will be scheduled to run.
* Consider using __napi_schedule_irqoff() if hard irqs are masked.
*/
void __napi_schedule(struct napi_struct *n)
{
unsigned long flags;
local_irq_save(flags);
____napi_schedule(this_cpu_ptr(&softnet_data), n); //per_cpu变量
local_irq_restore(flags);
}
/* Called with irq disabled */
static inline void ____napi_schedule(struct softnet_data *sd,struct napi_struct *napi)
{
list_add_tail(&napi->poll_list, &sd->poll_list); //将napi挂接到softnet_data上
__raise_softirq_irqoff(NET_RX_SOFTIRQ); //触发软中断,其实就是设置__softirq_pending中对应的软中断类型,唤醒ksoftirqd任务。
}
上面梳理过程将以太网数据帧的接收流程基本梳理清晰了。下面分析,new api如何将接收到的以太网数据帧投递给tcp/ip协议栈?