Linux内核网络源码分析——发送数据

原文地址: http://www.penna.cn/blog/?p=218

UDP发送:
|       sys_write               fs/read_write.c
|       sock_writev             net/socket.c                    
|       sock_sendmsg            net/socket.c
|       inet_sendmsg            net/ipv4/af_inet.c
|       udp_sendmsg             net/ipv4/udp.c
|       ip_build_xmit           net/ipv4/ip_output.c
|       output_maybe_reroute    net/ipv4/ip_output.c
|       ip_output               net/ipv4/ip_output.c
|       ip_finish_output        net/ipv4/ip_output.c
|       dev_queue_xmit          net/dev.c
|       ——————————————–
|       el3_start_xmit          driver/net/3c309.c
V

write()
e.g. write(sockfd,”Hello”,strlen(“Hello”));
user
————————————————–
kernel

sys_write() <fs/read_write.c>
asmlinkage ssize_t sys_write(unsigned int fd,const char __user * buf,size_t count)

ret = vfs_write(file,buf,count,&pos);

vfs_write()

if (file->f_op->write)
ret = file->f_op->write(file,buf,count,pos);

//在前面建立socket时sock_map_fd将sock对应file的f_op定义为socket_file_ops,参见:
static const struct file_operations socket_file_ops ={

.aio_write = sock_aio_write,

}
sock_aio_write()//与之前的版本不同了。。。
do_sock_write()
__sock_sendmsg()

static inline int __sock_sendmsg(struct kiocb *iocb,struct socket *sock,struct msghdr *msg,size_t size)

return sock->ops->sendmsg(iocb,sock,msg,size);

//sock->ops在inet_create函数中被初始化,参见:
inet_create() <net/ipv4/af_inet.c:>
static struct inet_protosw inetsw_array[] <net/ipv4/af_inet.c:>
<net/ipv4/tcp_ipv4.c>
const struct proto_ops inet_stream_ops ={

.sendmsg = tcp_sendmsg,

}
<net/ipv4/udp.c>
const struct proto_ops inet_dgram_ops ={

.sendmsg = inet_sendmsg,

}
————————————————–
UDP
inet_sendmsg(…)
int inet_sendmsg(struct kiocb *iocb,struct socket *sock,struct msghdr *msg,size_t size)

return sk->sk_prot->sendmsg(iocb,sk,msg,size);


udp_sendmsg(…)
ip_route_output_flow()
这里进行路由!参见5、路由和ARP

ip_append_data()
* ip_append_data() and ip_append_page() can make one large IP datagram
* from many pieces of data. Each pieces will be holded on the socket
* until ip_push_pending_frames() is called. Each piece can be a page
* or non-page data.
*
* Not only UDP,other transport protocols –e.g. raw sockets –can use
* this interface potentially.
*
* LATER:length must be adjusted by pad at tail,when it is required.
udp_push_pending_frames()

udp_push_pending_frames()
* Push out all pending data as one UDP datagram. Socket is locked.

————————————————–
TCP
tcp_transmit_skb()
err = icsk->icsk_af_ops->queue_xmit(skb,0);

tcp_transmit_skb 引用表:
tcp_mtu_probe
tcp_write_xmit
tcp_push_one
tcp_retransmit_skb
tcp_send_active_reset
tcp_send_synack
tcp_connect
tcp_send_ack
tcp_xmit_probe_skb
tcp_write_wakeup
 

ip_queue_xmit() ip_send_reply() ip_build_and_send_pkt()
int ip_queue_xmit(struct sk_buff *skb,int ipfragok)
这里有route过程
ip_route_output_flow(…)

*dccp int ip_build_and_send_pkt(struct sk_buff *skb,struct sock *sk,__be32 saddr,__be32 daddr,struct ip_options *opt)
Add an ip header to a skbuff and send it out.

void ip_send_reply(struct sock *sk,struct sk_buff *skb,struct ip_reply_arg *arg,unsigned int len) <net/ipv4/ip_output.c>
* Generic function to send a packet as reply to another packet.
* Used to send TCP resets so far. ICMP should use this function too.

* Should run single threaded per socket because it uses the sock
* structure to pass arguments.
这里有用到ip_route_output_key()进行路由。

int ip_push_pending_frames(struct sock *sk);
Combined all pending IP fragments on the socket as one IP datagram
and push them out.
ip_local_out();

————————————————–
IP

ip_push_pending_frames()

ip_local_out()
 <ip_output.c>
int ip_local_out(struct sk_buff *skb)

{
int err;

err = __ip_local_out(skb);
if (likely(err == 1))
err = dst_output(skb);

return err;
}
EXPORT_SYMBOL_GPL(ip_local_out);

__ip_local_out() <ip_output.c>
int __ip_local_out(struct sk_buff *skb)
{
struct iphdr *iph = ip_hdr(skb);

iph->tot_len = htons(skb->len);
ip_send_check(iph);
return nf_hook(PF_INET,NF_INET_LOCAL_OUT,skb,NULL,skb->dst->dev,
dst_output);
}

dst_output() <net/core/dst.c>

static inline int dst_output(struct sk_buff *skb)
{
return skb->dst->output(skb);
}

其中dst->output() = ip_output();在__mkroute_output()和__mkroute_input()中注册。

ip_output() <net/ipv4/ip_output.c>

return NF_HOOK_COND(PF_INET,NF_INET_POST_ROUTING,skb,NULL,dev,
ip_finish_output,
!(IPCB(skb)->flags &IPSKB_REROUTED));
 

ip_finish_output() <net/ipv4/ip_output.c>

static int ip_finish_output(struct sk_buff *skb)
{
#if defined(CONFIG_NETFILTER) &&defined(CONFIG_XFRM)

if (skb->dst->xfrm != NULL){
IPCB(skb)->flags |= IPSKB_REROUTED;
return dst_output(skb);
}
#endif
if (skb->len >ip_skb_dst_mtu(skb) &&!skb_is_gso(skb))
return ip_fragment(skb,ip_finish_output2);
else
return ip_finish_output2(skb);
}
 

ip_finish_output2() <net/ipv4/ip_output.c>


if (dst->hh)
         return neigh_hh_output(dst->hh,skb);
else if (dst->neighbour)
         return dst->neighbour->output(skb);

// 此函数在neigh_alloc中注册为neigh_blackhole(),但这个是默认的,一般会被替换掉

static struct neighbour *neigh_alloc(struct neigh_table *tbl)

n->output = neigh_blackhole;

// 此函数在arp_constructor中注册为ops->queue_xmit或ops->output或connected_output

static int arp_constructor(struct neighbour *neigh)

.output = neigh_resolve_output,
.connected_output = neigh_connected_output,
.queue_xmit = dev_queue_xmit()

// 故一般为neigh_resolve_output

neigh_resolve_output() <net/core/neighbour.c>


err = dev_hard_header(skb,dev,ntohs(skb->protocol),neigh->ha,NULL,skb->len);
// 这里有Mac头填充的动作,参见路由和ARP
// 说明此前neigh->ha已获取。如果neigh->ha为空呢?

if (err >= 0)
        rc = neigh->ops->queue_xmit(skb);// 此函数注册为dev_queue_xmit() 

dev_queue_xmit() <net/core/dev.c>

int dev_queue_xmit(struct sk_buff *skb)


if (!netif_queue_stopped(dev) &&
    !netif_subqueue_stopped(dev,skb)){
    rc = 0;
    if (!dev_hard_start_xmit(skb,dev)){
        HARD_TX_UNLOCK(dev);
        goto out;
    }
}

dev_hard_start_xmit() <net/core/dev.c>

int dev_hard_start_xmit(struct sk_buff *skb,struct net_device *dev)


return dev->hard_start_xmit(skb,dev);

xxx_start_xmit() <drivers/net/xxx.c>

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值