原文出处:
http://blog.chinaunix.net/uid-23629988-id-371188.html
http://blog.chinaunix.net/uid-23629988-id-439718.html
http://blog.chinaunix.net/uid-23629988-id-1619346.html
intip_output(struct sk_buff*skb)
{
/* 得到出口的dev */
struct net_device*dev=skb_dst(skb)->dev;
IP_UPD_PO_STATS(dev_net(dev),IPSTATS_MIB_OUT,skb->len);
skb->dev=dev;
skb->protocol=htons(ETH_P_IP);
return NF_HOOK_COND(NFPROTO_IPV4,NF_INET_POST_ROUTING,skb,NULL,dev,
ip_finish_output,
!(IPCB(skb)->flags&IPSKB_REROUTED));
}
staticintip_finish_output(struct sk_buff*skb)
{
#ifdefined(CONFIG_NETFILTER)&&defined(CONFIG_XFRM)
/*Policy lookup after SNAT yielded a new policy*/
if(skb_dst(skb)->xfrm!=NULL){
IPCB(skb)->flags|=IPSKB_REROUTED;
return dst_output(skb);
}
#endif
/* 处理需要IP分片的情况 */
if(skb->len>ip_skb_dst_mtu(skb)&&!skb_is_gso(skb))
return ip_fragment(skb,ip_finish_output2);
/* 不需要IP分片, 我们就看这种一般情况 */
else
return ip_finish_output2(skb);
}
static inlineintip_finish_output2(struct sk_buff*skb)
{
struct dst_entry*dst=skb_dst(skb);
struct rtable*rt=(struct rtable*)dst;
struct net_device*dev=dst->dev;
unsignedinthh_len=LL_RESERVED_SPACE(dev);
if(rt->rt_type==RTN_MULTICAST){
IP_UPD_PO_STATS(dev_net(dev),IPSTATS_MIB_OUTMCAST,skb->len);
}elseif(rt->rt_type==RTN_BROADCAST)
IP_UPD_PO_STATS(dev_net(dev),IPSTATS_MIB_OUTBCAST,skb->len);
/*Be paranoid,rather than too clever.*/
if(unlikely(skb_headroom(skb)<hh_len&&dev->header_ops)){
/*
skb的首部空间不足,无法保存l2层的硬件地址。
这时,需要重新分配一个更大bufer。
*/
struct sk_buff*skb2;
skb2=skb_realloc_headroom(skb,LL_RESERVED_SPACE(dev));
if(skb2==NULL){
kfree_skb(skb);
return-ENOMEM;
}
if(skb->sk)
skb_set_owner_w(skb2,skb->sk);
kfree_skb(skb);
skb=skb2;
}
if(dst->hh)
return neigh_hh_output(dst->hh,skb);
/*
dst没有L2层地址的cache,需要调用neighbour子系统的output进行发送。
*/
elseif(dst->neighbour)
return dst->neighbour->output(skb);
if(net_ratelimit())
printk(KERN_DEBUG"ip_finish_output2: No header cache and no neighbour!\n");
kfree_skb(skb);
return-EINVAL;
}
int neigh_resolve_output(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct neighbour *neigh;
int rc = 0;
if(!dst ||!(neigh = dst->neighbour))
goto discard;
__skb_pull(skb, skb_network_offset(skb));
if(!neigh_event_send(neigh, skb)){
/* 无需发送neigh请求, 可以直接从dev中获得 */
interr;
struct net_device *dev = neigh->dev;
if(dev->header_ops->cache &&!dst->hh){
write_lock_bh(&neigh->lock);
if(!dst->hh)
neigh_hh_init(neigh, dst, dst->ops->protocol);
err= dev_hard_header(skb, dev, ntohs(skb->protocol),
neigh->ha,NULL, skb->len);
write_unlock_bh(&neigh->lock);
}else{
read_lock_bh(&neigh->lock);
err= dev_hard_header(skb, dev, ntohs(skb->protocol),
neigh->ha,NULL, skb->len);
read_unlock_bh(&neigh->lock);
}
if(err>= 0)
rc = neigh->ops->queue_xmit(skb); //发送数据包
else
goto out_kfree_skb;
}
out:
return rc;
discard:
NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
dst, dst ? dst->neighbour :NULL);
out_kfree_skb:
rc =-EINVAL;
kfree_skb(skb);
goto out;
}
while(neigh->nud_state & NUD_VALID &&
(skb = __skb_dequeue(&neigh->arp_queue))!=NULL){
struct neighbour *n1 = neigh;
write_unlock_bh(&neigh->lock);
/*On shaper/eql skb->dst->neighbour != neigh :(*/
if(skb_dst(skb)&& skb_dst(skb)->neighbour)
n1 = skb_dst(skb)->neighbour;
n1->output(skb);
write_lock_bh(&neigh->lock);
}
intdev_queue_xmit(struct sk_buff*skb)
{
struct net_device*dev=skb->dev;
struct netdev_queue*txq;
struct Qdisc*q;
intrc=-ENOMEM;
/*Disable soft irqsforvarious locks below.Also
*stops preemptionforRCU.
*/
rcu_read_lock_bh();
txq=dev_pick_tx(dev,skb);
q=rcu_dereference_bh(txq->qdisc);
#ifdef CONFIG_NET_CLS_ACT
skb->tc_verd=SET_TC_AT(skb->tc_verd,AT_EGRESS);
#endif
if(q->enqueue){
/* 一般的dev都应该进入这里 */
rc=__dev_xmit_skb(skb,q,dev,txq);
goto out;
}
}
static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
struct net_device *dev,
struct netdev_queue *txq)
{
spinlock_t *root_lock = qdisc_lock(q);
bool contended = qdisc_is_running(q);
int rc;
/*
* Heuristic to force contended enqueues to serialize on a
* separate lock before trying toget qdisc main lock.
* This permits __QDISC_STATE_RUNNING owner toget the lock more often
*and dequeue packets faster.
*/
if(unlikely(contended))
spin_lock(&q->busylock);
spin_lock(root_lock);
if(unlikely(test_bit(__QDISC_STATE_DEACTIVATED,&q->state))){
/* 该quque的状态为非活动的,drop该数据包 */
kfree_skb(skb);
rc = NET_XMIT_DROP;
}elseif((q->flags & TCQ_F_CAN_BYPASS)&&!qdisc_qlen(q)&&
qdisc_run_begin(q)){
/*
* This is a work-conserving queue; there are no old skbs
* waiting to be sent out;and the qdisc isnot running -
* xmit the skb directly.
*/
if(!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
skb_dst_force(skb);
__qdisc_update_bstats(q, skb->len);
if(sch_direct_xmit(skb, q, dev, txq, root_lock)){
if(unlikely(contended)){
spin_unlock(&q->busylock);
contended =false;
}
__qdisc_run(q);
}else
qdisc_run_end(q);
rc = NET_XMIT_SUCCESS;
}else{
skb_dst_force(skb);
/* 将数据包加入到queue中 */
rc = qdisc_enqueue_root(skb, q);
if(qdisc_run_begin(q)){
if(unlikely(contended)){
spin_unlock(&q->busylock);
contended =false;
}
__qdisc_run(q);
}
}
spin_unlock(root_lock);
if(unlikely(contended))
spin_unlock(&q->busylock);
return rc;
}
请看dev_activate,用于激活网卡。
void dev_activate(struct net_device *dev)
{
int need_watchdog;
/* No queueing discipline is attached to device;
create default one i.e. pfifo_fast for devices,
which need queueing and noqueue_qdisc for
virtual interfaces
*/
当没有指定queueing discipline时,就使用默认的discipline
*/
if(dev->qdisc ==&noop_qdisc)
attach_default_qdiscs(dev);
...... ......
}
struct Qdisc_ops pfifo_fast_ops __read_mostly ={
.id ="pfifo_fast",
.priv_size = sizeof(struct pfifo_fast_priv),
.enqueue = pfifo_fast_enqueue,
.dequeue = pfifo_fast_dequeue,
.peek = pfifo_fast_peek,
.init = pfifo_fast_init,
.reset = pfifo_fast_reset,
.dump = pfifo_fast_dump,
.owner = THIS_MODULE,
};
static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
{
if(skb_queue_len(&qdisc->q)< qdisc_dev(qdisc)->tx_queue_len){
int band = prio2band[skb->priority & TC_PRIO_MAX];
struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
struct sk_buff_head *list = band2list(priv, band);
priv->bitmap |=(1 << band);
qdisc->q.qlen++;
return __qdisc_enqueue_tail(skb, qdisc, list);
}
return qdisc_drop(skb, qdisc);
}
然后我还需要回到__dev_xmit_skb中,在加数据包加入到队列中后。要保证qdisc为运行态。
rc = qdisc_enqueue_root(skb, q);
if(qdisc_run_begin(q)){
if(unlikely(contended)){
spin_unlock(&q->busylock);
contended =false;
}
__qdisc_run(q);
}
void __qdisc_run(struct Qdisc *q)
{
unsigned long start_time = jiffies;
/*
qdisc_restart中发送了数据包。
这里是循环发送,直至qdisc_restart返回0
或者其它进程请求CPU或发送已运行比较长的时间(1jiffie)则也跳出循环体。
*/
while(qdisc_restart(q)){
/*
* Postpone processing if
* 1. another process needs the CPU;
* 2. we've been doing it for too long.
*/
if(need_resched()|| jiffies != start_time){
/*
需要以后再执行发送动作(利用softirq)
*/
__netif_schedule(q);
break;
}
}
qdisc_run_end(q);
}
int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
struct net_device *dev, struct netdev_queue *txq,
spinlock_t *root_lock)
{
int ret = NETDEV_TX_BUSY;
/*And release qdisc */
spin_unlock(root_lock);
HARD_TX_LOCK(dev, txq, smp_processor_id());
//设备没有被停止,且发送队列没有被冻结
if(!netif_tx_queue_stopped(txq)&&!netif_tx_queue_frozen(txq))
ret = dev_hard_start_xmit(skb, dev, txq); //发送数据包
HARD_TX_UNLOCK(dev, txq);
spin_lock(root_lock);
if(dev_xmit_complete(ret)){
/* Driver sent out skb successfully or skb was consumed */
//发送成功,返回qdisc新的队列产的
ret = qdisc_qlen(q);
}elseif(ret == NETDEV_TX_LOCKED){
/* Driver try lock failed */
//锁冲突
ret = handle_dev_cpu_collision(skb, txq, q);
}else{
/* Driver returned NETDEV_TX_BUSY - requeue skb */
if(unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
printk(KERN_WARNING "BUG %s code %d qlen %d\n",
dev->name, ret, q->q.qlen);
ret = dev_requeue_skb(skb, q);
}
if(ret &&(netif_tx_queue_stopped(txq)||
netif_tx_queue_frozen(txq)))
ret = 0;
return ret;
}
int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq)
{
const struct net_device_ops *ops = dev->netdev_ops;
int rc = NETDEV_TX_OK;
if(likely(!skb->next)){
if(!list_empty(&ptype_all))
dev_queue_xmit_nit(skb, dev); //ptype_all上的协议处理,如tcpdump
/*
*If device doesnt need skb->dst, release it rightnowwhile
* its hot in this cpu cache
*/
/* device 不需要dst */
if(dev->priv_flags & IFF_XMIT_DST_RELEASE)
skb_dst_drop(skb);
skb_orphan_try(skb);
if(netif_needs_gso(dev, skb)){
/* 需要scatter gather功能 */
if(unlikely(dev_gso_segment(skb)))
goto out_kfree_skb;
if(skb->next)
goto gso;
}else{
//不需要scatter gather
if(skb_needs_linearize(skb, dev)&&
__skb_linearize(skb))
goto out_kfree_skb;
/*If packet isnot checksummed and device does not
* support checksumming for this protocol, complete
* checksumming here.
*/
if(skb->ip_summed == CHECKSUM_PARTIAL){
/* 计算checksum */
skb_set_transport_header(skb, skb->csum_start -
skb_headroom(skb));
if(!dev_can_checksum(dev, skb)&&
skb_checksum_help(skb))
goto out_kfree_skb;
}
}
rc = ops->ndo_start_xmit(skb, dev);
if(rc == NETDEV_TX_OK)
txq_trans_update(txq);
return rc;
}
gso:
/* 循环发送数据包 */
do{
struct sk_buff *nskb = skb->next;
skb->next= nskb->next;
nskb->next=NULL;
/*
*If device doesnt need nskb->dst, release it rightnowwhile
* its hot in this cpu cache
*/
if(dev->priv_flags & IFF_XMIT_DST_RELEASE)
skb_dst_drop(nskb);
rc = ops->ndo_start_xmit(nskb, dev);
if(unlikely(rc != NETDEV_TX_OK)){
if(rc &~NETDEV_TX_MASK)
goto out_kfree_gso_skb;
nskb->next= skb->next;
skb->next= nskb;
return rc;
}
txq_trans_update(txq);
if(unlikely(netif_tx_queue_stopped(txq)&& skb->next))
return NETDEV_TX_BUSY;
}while(skb->next);
out_kfree_gso_skb:
if(likely(skb->next==NULL))
skb->destructor = DEV_GSO_CB(skb)->destructor;
out_kfree_skb:
kfree_skb(skb);
return rc;
}
static const struct net_device_ops e1000_netdev_ops ={
.ndo_open = e1000_open,
.ndo_stop = e1000_close,
.ndo_start_xmit = e1000_xmit_frame,
.ndo_get_stats = e1000_get_stats,
.ndo_set_rx_mode = e1000_set_rx_mode,
.ndo_set_mac_address = e1000_set_mac,
.ndo_tx_timeout = e1000_tx_timeout,
.ndo_change_mtu = e1000_change_mtu,
.ndo_do_ioctl = e1000_ioctl,
.ndo_validate_addr = eth_validate_addr,
.ndo_vlan_rx_register = e1000_vlan_rx_register,
.ndo_vlan_rx_add_vid = e1000_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = e1000_vlan_rx_kill_vid,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = e1000_netpoll,
#endif
};
转载于:https://blog.51cto.com/lvzg2005/1240867