Jack:我想知道用户如何把数据发送到内核空间的?
我:你觉得哪里比较难理解呢?
Jack:一般程序员会在程序里通过socket变量获得一个文件描述符,然后通过write把定义好的字符串写入到该描述符。
我:是的。你有什么不明白的吗?
Jack:可是,我不知道这个write底层到底会做什么。
我:这个write底层会调用sock_send函数。我给你看一下这个函数的定义。
static int
sock_send(int fd, void * buff, int len, unsigned flags)
{
struct socket *sock;
struct file *file;
DPRINTF((net_debug,
"NET: sock_send(fd = %d, buff = %X, len = %d, flags = %X)
",
fd, buff, len, flags));
if (fd < 0 || fd >= NR_OPEN || ((file = current->filp[fd]) == NULL))
return(-EBADF);
if (!(sock = sockfd_lookup(fd, NULL))) return(-ENOTSOCK);
return(sock->ops->send(sock, buff, len, (file->f_flags & O_NONBLOCK), flags));
}
sock_send函数通过用户传入的socket描述符fd找到对应的struct socket结构,然后把找到的socket结构。然后把socket结构(sock),buff(这是一个逻辑地址),以及文件flag传入传输层的对应函数。
最后一个语句return调用了一个函数指针(
这就是函数指针的妙处!),这个函数指针如果对应下面的传输层协议是UDP协议,就会调用udp_sendto.
static int
udp_write(struct sock *sk, unsigned char *buff, int len, int noblock,
unsigned flags)
{
return(udp_sendto(sk, buff, len, noblock, flags, NULL, 0));
}
其实是一个包裹函数。干活儿的是udp_sendto。
static int
udp_sendto(struct sock *sk, unsigned char *from, int len, int noblock,
unsigned flags, struct sockaddr_in *usin, int addr_len)
{
struct sockaddr_in sin;
int tmp;
int err;
DPRINTF((DBG_UDP, "UDP: sendto(len=%d, flags=%X)
", len, flags));
/* Check the flags. */
if (flags)
return(-EINVAL);
if (len < 0)
return(-EINVAL);
if (len == 0)
return(0);
/* Get and verify the address. */
if (usin) {
if (addr_len < sizeof(sin)) return(-EINVAL);
err=verify_area(VERIFY_READ, usin, sizeof(sin));
if(err)
return err;
memcpy_fromfs(&sin, usin, sizeof(sin));
if (sin.sin_family && sin.sin_family != AF_INET)
return(-EINVAL);
if (sin.sin_port == 0)
return(-EINVAL);
} else {
if (sk->state != TCP_ESTABLISHED) return(-EINVAL);
sin.sin_family = AF_INET;
sin.sin_port = sk->dummy_th.dest;
sin.sin_addr.s_addr = sk->daddr;
}
if(!sk->broadcast && chk_addr(sin.sin_addr.s_addr)==IS_BROADCAST)
return -EACCES;/* Must turn broadcast on first */
sk->inuse = 1;
/* Send the packet. */
tmp = udp_send(sk, &sin, from, len);
/* The datagram has been sent off. Release the socket. */
release_sock(sk);
return(tmp);
}
这其实也是一个包裹函数,真正干活的是udp_send函数。
static int
udp_send(struct sock *sk, struct sockaddr_in *sin,
unsigned char *from, int len)
{
struct sk_buff *skb;
struct device *dev;
struct udphdr *uh;
unsigned char *buff;
unsigned long saddr;
int size, tmp;
int err;
DPRINTF((DBG_UDP, "UDP: send(dst=%s:%d buff=%X len=%d)
",
in_ntoa(sin->sin_addr.s_addr), ntohs(sin->sin_port),
from, len));
err=verify_area(VERIFY_READ, from, len);
if(err)
return(err);
/* Allocate a copy of the packet. */
size = sizeof(struct sk_buff) + sk->prot->max_header + len;
skb = sk->prot->wmalloc(sk, size, 0, GFP_KERNEL);
if (skb == NULL) return(-ENOMEM);
skb->mem_addr = skb;
skb->mem_len = size;
skb->sk = NULL;/* to avoid changing sk->saddr */
skb->free = 1;
skb->arp = 0;
/* Now build the IP and MAC header. */
buff = skb->data;
saddr = 0;
dev = NULL;
DPRINTF((DBG_UDP, "UDP: >> IP_Header: %X -> %X dev=%X prot=%X len=%d
",
saddr, sin->sin_addr.s_addr, dev, IPPROTO_UDP, skb->mem_len));
tmp = sk->prot->build_header(skb, saddr, sin->sin_addr.s_addr,
&dev, IPPROTO_UDP, sk->opt, skb->mem_len,sk->ip_tos,sk->ip_ttl);
skb->sk=sk;/* So memory is freed correctly */
if (tmp < 0 ) {
sk->prot->wfree(sk, skb->mem_addr, skb->mem_len);
return(tmp);
}
buff += tmp;
saddr = dev->pa_addr;
DPRINTF((DBG_UDP, "UDP: >> MAC+IP len=%d
", tmp));
skb->len = tmp + sizeof(struct udphdr) + len;/* len + UDP + IP + MAC */
skb->dev = dev;
#ifdef OLD
/*
* This code used to hack in some form of fragmentation.
* I removed that, since it didn't work anyway, and it made the
* code a bad thing to read and understand. -FvK
*/
if (len > dev->mtu) {
#else
if (skb->len > 4095)
{
#endif
printk("UDP: send: length %d > mtu %d (ignored)
", len, dev->mtu);
sk->prot->wfree(sk, skb->mem_addr, skb->mem_len);
return(-EMSGSIZE);
}
/* Fill in the UDP header. */
uh = (struct udphdr *) buff;
uh->len = htons(len + sizeof(struct udphdr));
uh->source = sk->dummy_th.source;
uh->dest = sin->sin_port;
buff = (unsigned char *) (uh + 1);
/* Copy the user data. */
memcpy_fromfs(buff, from, len);
/* Set up the UDP checksum. */
udp_send_check(uh, saddr, sin->sin_addr.s_addr, skb->len - tmp, sk);
/* Send the datagram to the interface. */
sk->prot->queue_xmit(sk, dev, skb, 1);
return(len);
}
这个函数里真正干活的是memcpy_fromfs函数,执行完了这个函数,数据就已经从用户空间拷贝到内核空间了。
之后的sk->prot->queue_xmit(sk, dev, skb, 1);通过函数指针把sk上的skb这个数据包排入发送队列。