这里主要分析TCP/IP 数据发送过程,
1 数据发送
用户一般从udp和tcp socket发送数据,经过路由和邻居系统后,最终调用到dev_queue_xmit函数来发送数据.
1.1 dev_queue_xmit函数
int dev_queue_xmit(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
struct netdev_queue *txq;
struct Qdisc *q;
int rc = -ENOMEM;
skb_reset_mac_header(skb);
rcu_read_lock_bh();
skb_update_prio(skb);
/*通过skb包携带的IP报头的tos确定发送队列 */
txq = netdev_pick_tx(dev, skb);
/*获取流量管理规则 */
q = rcu_dereference_bh(txq->qdisc);
trace_net_dev_queue(skb);
/*在流量规则初始化时被设置为pfifo_fast_enqueue */
if (q->enqueue) {
rc = __dev_xmit_skb(skb, q, dev, txq);
goto out;
}
/*.............*/
out:
rcu_read_unlock_bh();
return rc;}
1.2 __dev_xmit_skb 函数
static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
struct net_device *dev,
struct netdev_queue *txq)
{
spinlock_t *root_lock = qdisc_lock(q);
bool contended;
int rc;
/*计算skb携带数据大小 */
qdisc_pkt_len_init(skb);
qdisc_calculate_pkt_len(skb, q);
/*判断qdisc是否处于运行状态 */
contended = qdisc_is_running(q);
if (unlikely(contended))
spin_lock(&q->busylock);
spin_lock(root_lock);
if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
printk(KERN_WARNING "[mtk_net]__dev_xmit_skb drop skb_len = %d \n", skb->len);
kfree_skb(skb);
rc = NET_XMIT_DROP;
} else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
qdisc_run_begin(q)) {
/*实际上这里是直接发送报文,无需使用流量控制,但需要满足三个条件
1. pfifo设置TCQ_F_CAN_BYPASS标志,
2. qdisc_qlen为0,也就是没有多余到数据包待发送,
3. qdisc_run_bein(q)=1,也就是说txq队列上没有运行流量控制
*/
if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
skb_dst_force(skb);
/*更新统计信息 */
qdisc_bstats_update(q, skb);
/*直接调用sch_direct_xmit发送数据,如果返回1,表示需要启用流量控制 */
if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
if (unlikely(contended)) {
spin_unlock(&q->busylock);
contended = false;
}
__qdisc_run(q);/*启动流量控制 */
} else
qdisc_run_end(q);/*标记停止流量控制 */
rc = NET_XMIT_SUCCESS;
} else {
skb_dst_force(skb);
/*进入这里,表现系统正在调用sch_direct_xmit发送数据,所以这里只能调用q->enqueue入队列,暂缓发送,sch_direct_xmit会释放root_lock*/
rc = q->enqueue(skb, q) & NET_XMIT_MASK;
if (qdisc_run_begin(q)) {
if (unlikely(contended)) {
spin_unlock(&q->busylock);
contended = false;
}
__qdisc_run(q);
}
}
spin_unlock(root_lock);
if (unlikely(contended))
spin_unlock(&q->busylock);
return rc;
}
1.3 sch_direct_xmit 函数
int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
struct net_device *dev, struct netdev_queue *txq,
spinlock_t *root_lock)
{
int ret = NETDEV_TX_BUSY;
/*释放root_lock,这样其他cpu也可以发送数据 */
/* And release qdisc */
spin_unlock(root_lock);
HARD_TX_LOCK(dev, txq, smp_processor_id());
if (!netif_xmit_frozen_or_stopped(txq))/*这里直接调用驱动的ndo_start_xmit函数发送数据,会对skb数据进行线性化处理 */
ret = dev_hard_start_xmit(skb, dev, txq);
HARD_TX_UNLOCK(dev, txq);
spin_lock(root_lock);
if (dev_xmit_complete(ret)) {
/* Driver sent out skb successfully or skb was consumed */
ret = qdisc_qlen(q);
} else if (ret == NETDEV_TX_LOCKED) {
/* Driver try lock failed */
ret = handle_dev_cpu_collision(skb, txq, q);
} else {
/* Driver returned NETDEV_TX_BUSY - requeue skb */
if (unlikely(ret != NETDEV_TX_BUSY))
net_warn_ratelimited("BUG %s code %d qlen %d\n",
dev->name, ret, q->q.qlen);
ret = dev_requeue_skb(skb, q);
}
if (ret && netif_xmit_frozen_or_stopped(txq))
ret = 0;
return ret;
}
1.4 __qdisc_run 流量控制
oid __qdisc_run(struct Qdisc *q)
{
int quota = weight_p;/*权重,一次至多发送weight_p个数据包 */
while (qdisc_restart(q)) {/*从流量控制中dequeue出skb,并使用sch_direct_xmit直接发送数据 */
/*
* Ordered by possible occurrence: Postpone processing if
* 1. we've exceeded packet quota
* 2. another process needs the CPU;
*/
if (--quota <= 0 || need_resched()) {
__netif_schedule(q);/*如果还有数据没发完,进行一次调度,剩余的包由net_tx_action函数发送 */
break;
}
}
qdisc_run_end(q);
}
2.5 net_tx_action 函数
当系统负载比较重时,会启动流量控制,如果一次流量控制不能发送完数据,就会启动一个软中断
static void net_tx_action(struct softirq_action *h)
{
struct softnet_data *sd = &__get_cpu_var(softnet_data);
if (sd->completion_queue) {/*如果使用dev_kfree_skb_irq释放skb,则会连入completion_queue队列 */
struct sk_buff *clist;
/*释放skb */
local_irq_disable();
clist = sd->completion_queue;
sd->completion_queue = NULL;
local_irq_enable();
while (clist) {
struct sk_buff *skb = clist;
clist = clist->next;
WARN_ON(atomic_read(&skb->users));
trace_kfree_skb(skb, net_tx_action);
__kfree_skb(skb);
}
}
if (sd->output_queue) {/*__netif_schedule会设置output_queue */
struct Qdisc *head;
local_irq_disable();
head = sd->output_queue;
sd->output_queue = NULL;
sd->output_queue_tailp = &sd->output_queue;
local_irq_enable();
while (head) {
struct Qdisc *q = head;
spinlock_t *root_lock;
head = head->next_sched;
root_lock = qdisc_lock(q);
if (spin_trylock(root_lock)) {
smp_mb__before_clear_bit();
clear_bit(__QDISC_STATE_SCHED,
&q->state);
qdisc_run(q);/*使用流量控制发送数据包 */
spin_unlock(root_lock);
} else {
if (!test_bit(__QDISC_STATE_DEACTIVATED,
&q->state)) {
__netif_reschedule(q);/*如果拿不到root_lock,启动一个软中断 */
} else {
smp_mb__before_clear_bit();
clear_bit(__QDISC_STATE_SCHED,
&q->state);
}
}
}
}
}